Don't call VPP if the kernel isn't ready.
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
95                                    const struct i965_surface *src_surface,
96                                    const VARectangle *src_rect,
97                                    struct i965_surface *dst_surface,
98                                    const VARectangle *dst_rect,
99                                    void *filter_param);
100 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                             const struct i965_surface *src_surface,
102                                             const VARectangle *src_rect,
103                                             struct i965_surface *dst_surface,
104                                             const VARectangle *dst_rect,
105                                             void *filter_param);
106 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
107                                            const struct i965_surface *src_surface,
108                                            const VARectangle *src_rect,
109                                            struct i965_surface *dst_surface,
110                                            const VARectangle *dst_rect,
111                                            void *filter_param);
112 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
113                                              const struct i965_surface *src_surface,
114                                              const VARectangle *src_rect,
115                                              struct i965_surface *dst_surface,
116                                              const VARectangle *dst_rect,
117                                              void *filter_param);
118 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
119                                                 const struct i965_surface *src_surface,
120                                                 const VARectangle *src_rect,
121                                                 struct i965_surface *dst_surface,
122                                                 const VARectangle *dst_rect,
123                                                 void *filter_param);
124 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
125                                         const struct i965_surface *src_surface,
126                                         const VARectangle *src_rect,
127                                         struct i965_surface *dst_surface,
128                                         const VARectangle *dst_rect,
129                                         void *filter_param);
130 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
131                                       const struct i965_surface *src_surface,
132                                       const VARectangle *src_rect,
133                                       struct i965_surface *dst_surface,
134                                       const VARectangle *dst_rect,
135                                       void *filter_param);
136
137 static struct pp_module pp_modules_gen5[] = {
138     {
139         {
140             "NULL module (for testing)",
141             PP_NULL,
142             pp_null_gen5,
143             sizeof(pp_null_gen5),
144             NULL,
145         },
146
147         pp_null_initialize,
148     },
149
150     {
151         {
152             "NV12_NV12",
153             PP_NV12_LOAD_SAVE_N12,
154             pp_nv12_load_save_nv12_gen5,
155             sizeof(pp_nv12_load_save_nv12_gen5),
156             NULL,
157         },
158
159         pp_plx_load_save_plx_initialize,
160     },
161
162     {
163         {
164             "NV12_PL3",
165             PP_NV12_LOAD_SAVE_PL3,
166             pp_nv12_load_save_pl3_gen5,
167             sizeof(pp_nv12_load_save_pl3_gen5),
168             NULL,
169         },
170
171         pp_plx_load_save_plx_initialize,
172     },
173
174     {
175         {
176             "PL3_NV12",
177             PP_PL3_LOAD_SAVE_N12,
178             pp_pl3_load_save_nv12_gen5,
179             sizeof(pp_pl3_load_save_nv12_gen5),
180             NULL,
181         },
182
183         pp_plx_load_save_plx_initialize,
184     },
185
186     {
187         {
188             "PL3_PL3",
189             PP_PL3_LOAD_SAVE_N12,
190             pp_pl3_load_save_pl3_gen5,
191             sizeof(pp_pl3_load_save_pl3_gen5),
192             NULL,
193         },
194
195         pp_plx_load_save_plx_initialize
196     },
197
198     {
199         {
200             "NV12 Scaling module",
201             PP_NV12_SCALING,
202             pp_nv12_scaling_gen5,
203             sizeof(pp_nv12_scaling_gen5),
204             NULL,
205         },
206
207         pp_nv12_scaling_initialize,
208     },
209
210     {
211         {
212             "NV12 AVS module",
213             PP_NV12_AVS,
214             pp_nv12_avs_gen5,
215             sizeof(pp_nv12_avs_gen5),
216             NULL,
217         },
218
219         pp_nv12_avs_initialize_nlas,
220     },
221
222     {
223         {
224             "NV12 DNDI module",
225             PP_NV12_DNDI,
226             pp_nv12_dndi_gen5,
227             sizeof(pp_nv12_dndi_gen5),
228             NULL,
229         },
230
231         pp_nv12_dndi_initialize,
232     },
233
234     {
235         {
236             "NV12 DN module",
237             PP_NV12_DN,
238             pp_nv12_dn_gen5,
239             sizeof(pp_nv12_dn_gen5),
240             NULL,
241         },
242
243         pp_nv12_dn_initialize,
244     },
245 };
246
247 static const uint32_t pp_null_gen6[][4] = {
248 #include "shaders/post_processing/gen5_6/null.g6b"
249 };
250
251 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
252 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
253 };
254
255 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
256 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
257 };
258
259 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
260 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
261 };
262
263 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
264 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
265 };
266
267 static const uint32_t pp_nv12_scaling_gen6[][4] = {
268 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
269 };
270
271 static const uint32_t pp_nv12_avs_gen6[][4] = {
272 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
273 };
274
275 static const uint32_t pp_nv12_dndi_gen6[][4] = {
276 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
277 };
278
279 static const uint32_t pp_nv12_dn_gen6[][4] = {
280 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
281 };
282
283 static struct pp_module pp_modules_gen6[] = {
284     {
285         {
286             "NULL module (for testing)",
287             PP_NULL,
288             pp_null_gen6,
289             sizeof(pp_null_gen6),
290             NULL,
291         },
292
293         pp_null_initialize,
294     },
295
296     {
297         {
298             "NV12_NV12",
299             PP_NV12_LOAD_SAVE_N12,
300             pp_nv12_load_save_nv12_gen6,
301             sizeof(pp_nv12_load_save_nv12_gen6),
302             NULL,
303         },
304
305         pp_plx_load_save_plx_initialize,
306     },
307
308     {
309         {
310             "NV12_PL3",
311             PP_NV12_LOAD_SAVE_PL3,
312             pp_nv12_load_save_pl3_gen6,
313             sizeof(pp_nv12_load_save_pl3_gen6),
314             NULL,
315         },
316         
317         pp_plx_load_save_plx_initialize,
318     },
319
320     {
321         {
322             "PL3_NV12",
323             PP_PL3_LOAD_SAVE_N12,
324             pp_pl3_load_save_nv12_gen6,
325             sizeof(pp_pl3_load_save_nv12_gen6),
326             NULL,
327         },
328
329         pp_plx_load_save_plx_initialize,
330     },
331
332     {
333         {
334             "PL3_PL3",
335             PP_PL3_LOAD_SAVE_N12,
336             pp_pl3_load_save_pl3_gen6,
337             sizeof(pp_pl3_load_save_pl3_gen6),
338             NULL,
339         },
340
341         pp_plx_load_save_plx_initialize,
342     },
343
344     {
345         {
346             "NV12 Scaling module",
347             PP_NV12_SCALING,
348             pp_nv12_scaling_gen6,
349             sizeof(pp_nv12_scaling_gen6),
350             NULL,
351         },
352
353         gen6_nv12_scaling_initialize,
354     },
355
356     {
357         {
358             "NV12 AVS module",
359             PP_NV12_AVS,
360             pp_nv12_avs_gen6,
361             sizeof(pp_nv12_avs_gen6),
362             NULL,
363         },
364
365         pp_nv12_avs_initialize_nlas,
366     },
367
368     {
369         {
370             "NV12 DNDI module",
371             PP_NV12_DNDI,
372             pp_nv12_dndi_gen6,
373             sizeof(pp_nv12_dndi_gen6),
374             NULL,
375         },
376
377         pp_nv12_dndi_initialize,
378     },
379
380     {
381         {
382             "NV12 DN module",
383             PP_NV12_DN,
384             pp_nv12_dn_gen6,
385             sizeof(pp_nv12_dn_gen6),
386             NULL,
387         },
388
389         pp_nv12_dn_initialize,
390     },
391 };
392
393 static const uint32_t pp_null_gen7[][4] = {
394 };
395
396 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
397 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
398 };
399
400 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
401 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
402 };
403
404 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
405 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
406 };
407
408 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
409 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
410 };
411
412 static const uint32_t pp_nv12_scaling_gen7[][4] = {
413 #include "shaders/post_processing/gen7/avs.g7b"
414 };
415
416 static const uint32_t pp_nv12_avs_gen7[][4] = {
417 #include "shaders/post_processing/gen7/avs.g7b"
418 };
419
420 static const uint32_t pp_nv12_dndi_gen7[][4] = {
421 #include "shaders/post_processing/gen7/dndi.g7b"
422 };
423
424 static const uint32_t pp_nv12_dn_gen7[][4] = {
425 };
426
427 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
428                                            const struct i965_surface *src_surface,
429                                            const VARectangle *src_rect,
430                                            struct i965_surface *dst_surface,
431                                            const VARectangle *dst_rect,
432                                            void *filter_param);
433 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
434                                              const struct i965_surface *src_surface,
435                                              const VARectangle *src_rect,
436                                              struct i965_surface *dst_surface,
437                                              const VARectangle *dst_rect,
438                                              void *filter_param);
439 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
440                                            const struct i965_surface *src_surface,
441                                            const VARectangle *src_rect,
442                                            struct i965_surface *dst_surface,
443                                            const VARectangle *dst_rect,
444                                            void *filter_param);
445
446 static struct pp_module pp_modules_gen7[] = {
447     {
448         {
449             "NULL module (for testing)",
450             PP_NULL,
451             pp_null_gen7,
452             sizeof(pp_null_gen7),
453             NULL,
454         },
455
456         pp_null_initialize,
457     },
458
459     {
460         {
461             "NV12_NV12",
462             PP_NV12_LOAD_SAVE_N12,
463             pp_nv12_load_save_nv12_gen7,
464             sizeof(pp_nv12_load_save_nv12_gen7),
465             NULL,
466         },
467
468         gen7_pp_plx_avs_initialize,
469     },
470
471     {
472         {
473             "NV12_PL3",
474             PP_NV12_LOAD_SAVE_PL3,
475             pp_nv12_load_save_pl3_gen7,
476             sizeof(pp_nv12_load_save_pl3_gen7),
477             NULL,
478         },
479         
480         gen7_pp_plx_avs_initialize,
481     },
482
483     {
484         {
485             "PL3_NV12",
486             PP_PL3_LOAD_SAVE_N12,
487             pp_pl3_load_save_nv12_gen7,
488             sizeof(pp_pl3_load_save_nv12_gen7),
489             NULL,
490         },
491
492         gen7_pp_plx_avs_initialize,
493     },
494
495     {
496         {
497             "PL3_PL3",
498             PP_PL3_LOAD_SAVE_N12,
499             pp_pl3_load_save_pl3_gen7,
500             sizeof(pp_pl3_load_save_pl3_gen7),
501             NULL,
502         },
503
504         gen7_pp_plx_avs_initialize,
505     },
506
507     {
508         {
509             "NV12 Scaling module",
510             PP_NV12_SCALING,
511             pp_nv12_scaling_gen7,
512             sizeof(pp_nv12_scaling_gen7),
513             NULL,
514         },
515
516         gen7_pp_plx_avs_initialize,
517     },
518
519     {
520         {
521             "NV12 AVS module",
522             PP_NV12_AVS,
523             pp_nv12_avs_gen7,
524             sizeof(pp_nv12_avs_gen7),
525             NULL,
526         },
527
528         gen7_pp_plx_avs_initialize,
529     },
530
531     {
532         {
533             "NV12 DNDI module",
534             PP_NV12_DNDI,
535             pp_nv12_dndi_gen7,
536             sizeof(pp_nv12_dndi_gen7),
537             NULL,
538         },
539
540         gen7_pp_nv12_dndi_initialize,
541     },
542
543     {
544         {
545             "NV12 DN module",
546             PP_NV12_DN,
547             pp_nv12_dn_gen7,
548             sizeof(pp_nv12_dn_gen7),
549             NULL,
550         },
551
552         gen7_pp_nv12_dn_initialize,
553     },
554 };
555
556 static int
557 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
558 {
559     struct i965_driver_data *i965 = i965_driver_data(ctx);
560     int fourcc;
561
562     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
563         struct object_image *obj_image = IMAGE(surface->id);
564         fourcc = obj_image->image.format.fourcc;
565     } else {
566         struct object_surface *obj_surface = SURFACE(surface->id);
567         fourcc = obj_surface->fourcc;
568     }
569
570     return fourcc;
571 }
572
573 static void
574 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
575 {
576     switch (tiling) {
577     case I915_TILING_NONE:
578         ss->ss3.tiled_surface = 0;
579         ss->ss3.tile_walk = 0;
580         break;
581     case I915_TILING_X:
582         ss->ss3.tiled_surface = 1;
583         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
584         break;
585     case I915_TILING_Y:
586         ss->ss3.tiled_surface = 1;
587         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
588         break;
589     }
590 }
591
592 static void
593 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
594 {
595     switch (tiling) {
596     case I915_TILING_NONE:
597         ss->ss2.tiled_surface = 0;
598         ss->ss2.tile_walk = 0;
599         break;
600     case I915_TILING_X:
601         ss->ss2.tiled_surface = 1;
602         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
603         break;
604     case I915_TILING_Y:
605         ss->ss2.tiled_surface = 1;
606         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
607         break;
608     }
609 }
610
611 static void
612 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
613 {
614     switch (tiling) {
615     case I915_TILING_NONE:
616         ss->ss0.tiled_surface = 0;
617         ss->ss0.tile_walk = 0;
618         break;
619     case I915_TILING_X:
620         ss->ss0.tiled_surface = 1;
621         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
622         break;
623     case I915_TILING_Y:
624         ss->ss0.tiled_surface = 1;
625         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
626         break;
627     }
628 }
629
630 static void
631 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
632 {
633     switch (tiling) {
634     case I915_TILING_NONE:
635         ss->ss2.tiled_surface = 0;
636         ss->ss2.tile_walk = 0;
637         break;
638     case I915_TILING_X:
639         ss->ss2.tiled_surface = 1;
640         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
641         break;
642     case I915_TILING_Y:
643         ss->ss2.tiled_surface = 1;
644         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
645         break;
646     }
647 }
648
649 static void
650 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
651 {
652     struct i965_interface_descriptor *desc;
653     dri_bo *bo;
654     int pp_index = pp_context->current_pp;
655
656     bo = pp_context->idrt.bo;
657     dri_bo_map(bo, 1);
658     assert(bo->virtual);
659     desc = bo->virtual;
660     memset(desc, 0, sizeof(*desc));
661     desc->desc0.grf_reg_blocks = 10;
662     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
663     desc->desc1.const_urb_entry_read_offset = 0;
664     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
665     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
666     desc->desc2.sampler_count = 0;
667     desc->desc3.binding_table_entry_count = 0;
668     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
669
670     dri_bo_emit_reloc(bo,
671                       I915_GEM_DOMAIN_INSTRUCTION, 0,
672                       desc->desc0.grf_reg_blocks,
673                       offsetof(struct i965_interface_descriptor, desc0),
674                       pp_context->pp_modules[pp_index].kernel.bo);
675
676     dri_bo_emit_reloc(bo,
677                       I915_GEM_DOMAIN_INSTRUCTION, 0,
678                       desc->desc2.sampler_count << 2,
679                       offsetof(struct i965_interface_descriptor, desc2),
680                       pp_context->sampler_state_table.bo);
681
682     dri_bo_unmap(bo);
683     pp_context->idrt.num_interface_descriptors++;
684 }
685
686 static void
687 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
688 {
689     struct i965_vfe_state *vfe_state;
690     dri_bo *bo;
691
692     bo = pp_context->vfe_state.bo;
693     dri_bo_map(bo, 1);
694     assert(bo->virtual);
695     vfe_state = bo->virtual;
696     memset(vfe_state, 0, sizeof(*vfe_state));
697     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
698     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
699     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
700     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
701     vfe_state->vfe1.children_present = 0;
702     vfe_state->vfe2.interface_descriptor_base = 
703         pp_context->idrt.bo->offset >> 4; /* reloc */
704     dri_bo_emit_reloc(bo,
705                       I915_GEM_DOMAIN_INSTRUCTION, 0,
706                       0,
707                       offsetof(struct i965_vfe_state, vfe2),
708                       pp_context->idrt.bo);
709     dri_bo_unmap(bo);
710 }
711
712 static void
713 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
714 {
715     unsigned char *constant_buffer;
716     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
717
718     assert(sizeof(*pp_static_parameter) == 128);
719     dri_bo_map(pp_context->curbe.bo, 1);
720     assert(pp_context->curbe.bo->virtual);
721     constant_buffer = pp_context->curbe.bo->virtual;
722     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
723     dri_bo_unmap(pp_context->curbe.bo);
724 }
725
726 static void
727 ironlake_pp_states_setup(VADriverContextP ctx,
728                          struct i965_post_processing_context *pp_context)
729 {
730     ironlake_pp_interface_descriptor_table(pp_context);
731     ironlake_pp_vfe_state(pp_context);
732     ironlake_pp_upload_constants(pp_context);
733 }
734
735 static void
736 ironlake_pp_pipeline_select(VADriverContextP ctx,
737                             struct i965_post_processing_context *pp_context)
738 {
739     struct intel_batchbuffer *batch = pp_context->batch;
740
741     BEGIN_BATCH(batch, 1);
742     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
743     ADVANCE_BATCH(batch);
744 }
745
746 static void
747 ironlake_pp_urb_layout(VADriverContextP ctx,
748                        struct i965_post_processing_context *pp_context)
749 {
750     struct intel_batchbuffer *batch = pp_context->batch;
751     unsigned int vfe_fence, cs_fence;
752
753     vfe_fence = pp_context->urb.cs_start;
754     cs_fence = pp_context->urb.size;
755
756     BEGIN_BATCH(batch, 3);
757     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
758     OUT_BATCH(batch, 0);
759     OUT_BATCH(batch, 
760               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
761               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
762     ADVANCE_BATCH(batch);
763 }
764
765 static void
766 ironlake_pp_state_base_address(VADriverContextP ctx,
767                                struct i965_post_processing_context *pp_context)
768 {
769     struct intel_batchbuffer *batch = pp_context->batch;
770
771     BEGIN_BATCH(batch, 8);
772     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
773     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
774     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
775     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
778     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
779     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
780     ADVANCE_BATCH(batch);
781 }
782
783 static void
784 ironlake_pp_state_pointers(VADriverContextP ctx,
785                            struct i965_post_processing_context *pp_context)
786 {
787     struct intel_batchbuffer *batch = pp_context->batch;
788
789     BEGIN_BATCH(batch, 3);
790     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
791     OUT_BATCH(batch, 0);
792     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
793     ADVANCE_BATCH(batch);
794 }
795
796 static void 
797 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
798                           struct i965_post_processing_context *pp_context)
799 {
800     struct intel_batchbuffer *batch = pp_context->batch;
801
802     BEGIN_BATCH(batch, 2);
803     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
804     OUT_BATCH(batch,
805               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
806               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
807     ADVANCE_BATCH(batch);
808 }
809
810 static void
811 ironlake_pp_constant_buffer(VADriverContextP ctx,
812                             struct i965_post_processing_context *pp_context)
813 {
814     struct intel_batchbuffer *batch = pp_context->batch;
815
816     BEGIN_BATCH(batch, 2);
817     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
818     OUT_RELOC(batch, pp_context->curbe.bo,
819               I915_GEM_DOMAIN_INSTRUCTION, 0,
820               pp_context->urb.size_cs_entry - 1);
821     ADVANCE_BATCH(batch);    
822 }
823
824 static void
825 ironlake_pp_object_walker(VADriverContextP ctx,
826                           struct i965_post_processing_context *pp_context)
827 {
828     struct intel_batchbuffer *batch = pp_context->batch;
829     int x, x_steps, y, y_steps;
830     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
831
832     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
833     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
834
835     for (y = 0; y < y_steps; y++) {
836         for (x = 0; x < x_steps; x++) {
837             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
838                 BEGIN_BATCH(batch, 20);
839                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
840                 OUT_BATCH(batch, 0);
841                 OUT_BATCH(batch, 0); /* no indirect data */
842                 OUT_BATCH(batch, 0);
843
844                 /* inline data grf 5-6 */
845                 assert(sizeof(*pp_inline_parameter) == 64);
846                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
847
848                 ADVANCE_BATCH(batch);
849             }
850         }
851     }
852 }
853
854 static void
855 ironlake_pp_pipeline_setup(VADriverContextP ctx,
856                            struct i965_post_processing_context *pp_context)
857 {
858     struct intel_batchbuffer *batch = pp_context->batch;
859
860     intel_batchbuffer_start_atomic(batch, 0x1000);
861     intel_batchbuffer_emit_mi_flush(batch);
862     ironlake_pp_pipeline_select(ctx, pp_context);
863     ironlake_pp_state_base_address(ctx, pp_context);
864     ironlake_pp_state_pointers(ctx, pp_context);
865     ironlake_pp_urb_layout(ctx, pp_context);
866     ironlake_pp_cs_urb_layout(ctx, pp_context);
867     ironlake_pp_constant_buffer(ctx, pp_context);
868     ironlake_pp_object_walker(ctx, pp_context);
869     intel_batchbuffer_end_atomic(batch);
870 }
871
872 static void
873 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
874                           dri_bo *surf_bo, unsigned long surf_bo_offset,
875                           int width, int height, int pitch, int format, 
876                           int index, int is_target)
877 {
878     struct i965_surface_state *ss;
879     dri_bo *ss_bo;
880     unsigned int tiling;
881     unsigned int swizzle;
882
883     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
884     ss_bo = pp_context->surface_state_binding_table.bo;
885     assert(ss_bo);
886
887     dri_bo_map(ss_bo, True);
888     assert(ss_bo->virtual);
889     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
890     memset(ss, 0, sizeof(*ss));
891     ss->ss0.surface_type = I965_SURFACE_2D;
892     ss->ss0.surface_format = format;
893     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
894     ss->ss2.width = width - 1;
895     ss->ss2.height = height - 1;
896     ss->ss3.pitch = pitch - 1;
897     pp_set_surface_tiling(ss, tiling);
898     dri_bo_emit_reloc(ss_bo,
899                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
900                       surf_bo_offset,
901                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
902                       surf_bo);
903     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
904     dri_bo_unmap(ss_bo);
905 }
906
907 static void
908 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
909                            dri_bo *surf_bo, unsigned long surf_bo_offset,
910                            int width, int height, int wpitch,
911                            int xoffset, int yoffset,
912                            int format, int interleave_chroma,
913                            int index)
914 {
915     struct i965_surface_state2 *ss2;
916     dri_bo *ss2_bo;
917     unsigned int tiling;
918     unsigned int swizzle;
919
920     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
921     ss2_bo = pp_context->surface_state_binding_table.bo;
922     assert(ss2_bo);
923
924     dri_bo_map(ss2_bo, True);
925     assert(ss2_bo->virtual);
926     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
927     memset(ss2, 0, sizeof(*ss2));
928     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
929     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
930     ss2->ss1.width = width - 1;
931     ss2->ss1.height = height - 1;
932     ss2->ss2.pitch = wpitch - 1;
933     ss2->ss2.interleave_chroma = interleave_chroma;
934     ss2->ss2.surface_format = format;
935     ss2->ss3.x_offset_for_cb = xoffset;
936     ss2->ss3.y_offset_for_cb = yoffset;
937     pp_set_surface2_tiling(ss2, tiling);
938     dri_bo_emit_reloc(ss2_bo,
939                       I915_GEM_DOMAIN_RENDER, 0,
940                       surf_bo_offset,
941                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
942                       surf_bo);
943     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
944     dri_bo_unmap(ss2_bo);
945 }
946
947 static void
948 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
949                           dri_bo *surf_bo, unsigned long surf_bo_offset,
950                           int width, int height, int pitch, int format, 
951                           int index, int is_target)
952 {
953     struct gen7_surface_state *ss;
954     dri_bo *ss_bo;
955     unsigned int tiling;
956     unsigned int swizzle;
957
958     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
959     ss_bo = pp_context->surface_state_binding_table.bo;
960     assert(ss_bo);
961
962     dri_bo_map(ss_bo, True);
963     assert(ss_bo->virtual);
964     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
965     memset(ss, 0, sizeof(*ss));
966     ss->ss0.surface_type = I965_SURFACE_2D;
967     ss->ss0.surface_format = format;
968     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
969     ss->ss2.width = width - 1;
970     ss->ss2.height = height - 1;
971     ss->ss3.pitch = pitch - 1;
972     gen7_pp_set_surface_tiling(ss, tiling);
973     dri_bo_emit_reloc(ss_bo,
974                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
975                       surf_bo_offset,
976                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
977                       surf_bo);
978     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
979     dri_bo_unmap(ss_bo);
980 }
981
982 static void
983 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
984                            dri_bo *surf_bo, unsigned long surf_bo_offset,
985                            int width, int height, int wpitch,
986                            int xoffset, int yoffset,
987                            int format, int interleave_chroma,
988                            int index)
989 {
990     struct gen7_surface_state2 *ss2;
991     dri_bo *ss2_bo;
992     unsigned int tiling;
993     unsigned int swizzle;
994
995     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
996     ss2_bo = pp_context->surface_state_binding_table.bo;
997     assert(ss2_bo);
998
999     dri_bo_map(ss2_bo, True);
1000     assert(ss2_bo->virtual);
1001     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1002     memset(ss2, 0, sizeof(*ss2));
1003     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1004     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1005     ss2->ss1.width = width - 1;
1006     ss2->ss1.height = height - 1;
1007     ss2->ss2.pitch = wpitch - 1;
1008     ss2->ss2.interleave_chroma = interleave_chroma;
1009     ss2->ss2.surface_format = format;
1010     ss2->ss3.x_offset_for_cb = xoffset;
1011     ss2->ss3.y_offset_for_cb = yoffset;
1012     gen7_pp_set_surface2_tiling(ss2, tiling);
1013     dri_bo_emit_reloc(ss2_bo,
1014                       I915_GEM_DOMAIN_RENDER, 0,
1015                       surf_bo_offset,
1016                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1017                       surf_bo);
1018     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1019     dri_bo_unmap(ss2_bo);
1020 }
1021
1022 static void 
1023 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1024                                 const struct i965_surface *surface, 
1025                                 int base_index, int is_target,
1026                                 int *width, int *height, int *pitch, int *offset)
1027 {
1028     struct i965_driver_data *i965 = i965_driver_data(ctx);
1029     struct object_surface *obj_surface;
1030     struct object_image *obj_image;
1031     dri_bo *bo;
1032     int fourcc = pp_get_surface_fourcc(ctx, surface);
1033     const int Y = 0;
1034     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1035     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1036     const int UV = 1;
1037     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1038
1039     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1040         obj_surface = SURFACE(surface->id);
1041         bo = obj_surface->bo;
1042         width[0] = obj_surface->orig_width;
1043         height[0] = obj_surface->orig_height;
1044         pitch[0] = obj_surface->width;
1045         offset[0] = 0;
1046
1047         if (interleaved_uv) {
1048             width[1] = obj_surface->orig_width;
1049             height[1] = obj_surface->orig_height / 2;
1050             pitch[1] = obj_surface->width;
1051             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1052         } else {
1053             width[1] = obj_surface->orig_width / 2;
1054             height[1] = obj_surface->orig_height / 2;
1055             pitch[1] = obj_surface->width / 2;
1056             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1057             width[2] = obj_surface->orig_width / 2;
1058             height[2] = obj_surface->orig_height / 2;
1059             pitch[2] = obj_surface->width / 2;
1060             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1061         }
1062     } else {
1063         obj_image = IMAGE(surface->id);
1064         bo = obj_image->bo;
1065         width[0] = obj_image->image.width;
1066         height[0] = obj_image->image.height;
1067         pitch[0] = obj_image->image.pitches[0];
1068         offset[0] = obj_image->image.offsets[0];
1069
1070         if (interleaved_uv) {
1071             width[1] = obj_image->image.width;
1072             height[1] = obj_image->image.height / 2;
1073             pitch[1] = obj_image->image.pitches[1];
1074             offset[1] = obj_image->image.offsets[1];
1075         } else {
1076             width[1] = obj_image->image.width / 2;
1077             height[1] = obj_image->image.height / 2;
1078             pitch[1] = obj_image->image.pitches[1];
1079             offset[1] = obj_image->image.offsets[1];
1080             width[2] = obj_image->image.width / 2;
1081             height[2] = obj_image->image.height / 2;
1082             pitch[2] = obj_image->image.pitches[2];
1083             offset[2] = obj_image->image.offsets[2];
1084         }
1085     }
1086
1087     /* Y surface */
1088     i965_pp_set_surface_state(ctx, pp_context,
1089                               bo, offset[Y],
1090                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1091                               base_index, is_target);
1092
1093     if (interleaved_uv) {
1094         i965_pp_set_surface_state(ctx, pp_context,
1095                                   bo, offset[UV],
1096                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1097                                   base_index + 1, is_target);
1098     } else {
1099         /* U surface */
1100         i965_pp_set_surface_state(ctx, pp_context,
1101                                   bo, offset[U],
1102                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1103                                   base_index + 1, is_target);
1104
1105         /* V surface */
1106         i965_pp_set_surface_state(ctx, pp_context,
1107                                   bo, offset[V],
1108                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1109                                   base_index + 2, is_target);
1110     }
1111
1112 }
1113
1114 static void 
1115 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1116                                      const struct i965_surface *surface, 
1117                                      int base_index, int is_target,
1118                                      int *width, int *height, int *pitch, int *offset)
1119 {
1120     struct i965_driver_data *i965 = i965_driver_data(ctx);
1121     struct object_surface *obj_surface;
1122     struct object_image *obj_image;
1123     dri_bo *bo;
1124     int fourcc = pp_get_surface_fourcc(ctx, surface);
1125     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1126                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1127     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1128                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1129     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1130
1131     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1132         obj_surface = SURFACE(surface->id);
1133         bo = obj_surface->bo;
1134         width[0] = obj_surface->orig_width;
1135         height[0] = obj_surface->orig_height;
1136         pitch[0] = obj_surface->width;
1137         offset[0] = 0;
1138
1139         width[1] = obj_surface->cb_cr_width;
1140         height[1] = obj_surface->cb_cr_height;
1141         pitch[1] = obj_surface->cb_cr_pitch;
1142         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1143
1144         width[2] = obj_surface->cb_cr_width;
1145         height[2] = obj_surface->cb_cr_height;
1146         pitch[2] = obj_surface->cb_cr_pitch;
1147         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1148     } else {
1149         obj_image = IMAGE(surface->id);
1150         bo = obj_image->bo;
1151         width[0] = obj_image->image.width;
1152         height[0] = obj_image->image.height;
1153         pitch[0] = obj_image->image.pitches[0];
1154         offset[0] = obj_image->image.offsets[0];
1155
1156         if (interleaved_uv) {
1157             width[1] = obj_image->image.width;
1158             height[1] = obj_image->image.height / 2;
1159             pitch[1] = obj_image->image.pitches[1];
1160             offset[1] = obj_image->image.offsets[1];
1161         } else {
1162             width[1] = obj_image->image.width / 2;
1163             height[1] = obj_image->image.height / 2;
1164             pitch[1] = obj_image->image.pitches[U];
1165             offset[1] = obj_image->image.offsets[U];
1166             width[2] = obj_image->image.width / 2;
1167             height[2] = obj_image->image.height / 2;
1168             pitch[2] = obj_image->image.pitches[V];
1169             offset[2] = obj_image->image.offsets[V];
1170         }
1171     }
1172
1173     if (is_target) {
1174         gen7_pp_set_surface_state(ctx, pp_context,
1175                                   bo, 0,
1176                                   width[0] / 4, height[0], pitch[0],
1177                                   I965_SURFACEFORMAT_R8_SINT,
1178                                   base_index, 1);
1179
1180         if (interleaved_uv) {
1181             gen7_pp_set_surface_state(ctx, pp_context,
1182                                       bo, offset[1],
1183                                       width[1] / 2, height[1], pitch[1],
1184                                       I965_SURFACEFORMAT_R8G8_SINT,
1185                                       base_index + 1, 1);
1186         } else {
1187             gen7_pp_set_surface_state(ctx, pp_context,
1188                                       bo, offset[1],
1189                                       width[1] / 4, height[1], pitch[1],
1190                                       I965_SURFACEFORMAT_R8_SINT,
1191                                       base_index + 1, 1);
1192             gen7_pp_set_surface_state(ctx, pp_context,
1193                                       bo, offset[2],
1194                                       width[2] / 4, height[2], pitch[2],
1195                                       I965_SURFACEFORMAT_R8_SINT,
1196                                       base_index + 2, 1);
1197         }
1198     } else {
1199         gen7_pp_set_surface2_state(ctx, pp_context,
1200                                    bo, offset[0],
1201                                    width[0], height[0], pitch[0],
1202                                    0, 0,
1203                                    SURFACE_FORMAT_Y8_UNORM, 0,
1204                                    base_index);
1205
1206         if (interleaved_uv) {
1207             gen7_pp_set_surface2_state(ctx, pp_context,
1208                                        bo, offset[1],
1209                                        width[1], height[1], pitch[1],
1210                                        0, 0,
1211                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1212                                        base_index + 1);
1213         } else {
1214             gen7_pp_set_surface2_state(ctx, pp_context,
1215                                        bo, offset[1],
1216                                        width[1], height[1], pitch[1],
1217                                        0, 0,
1218                                        SURFACE_FORMAT_R8_UNORM, 0,
1219                                        base_index + 1);
1220             gen7_pp_set_surface2_state(ctx, pp_context,
1221                                        bo, offset[2],
1222                                        width[2], height[2], pitch[2],
1223                                        0, 0,
1224                                        SURFACE_FORMAT_R8_UNORM, 0,
1225                                        base_index + 2);
1226         }
1227     }
1228 }
1229
1230 static int
1231 pp_null_x_steps(void *private_context)
1232 {
1233     return 1;
1234 }
1235
1236 static int
1237 pp_null_y_steps(void *private_context)
1238 {
1239     return 1;
1240 }
1241
1242 static int
1243 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1244 {
1245     return 0;
1246 }
1247
1248 static VAStatus
1249 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1250                    const struct i965_surface *src_surface,
1251                    const VARectangle *src_rect,
1252                    struct i965_surface *dst_surface,
1253                    const VARectangle *dst_rect,
1254                    void *filter_param)
1255 {
1256     /* private function & data */
1257     pp_context->pp_x_steps = pp_null_x_steps;
1258     pp_context->pp_y_steps = pp_null_y_steps;
1259     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1260
1261     dst_surface->flags = src_surface->flags;
1262
1263     return VA_STATUS_SUCCESS;
1264 }
1265
1266 static int
1267 pp_load_save_x_steps(void *private_context)
1268 {
1269     return 1;
1270 }
1271
1272 static int
1273 pp_load_save_y_steps(void *private_context)
1274 {
1275     struct pp_load_save_context *pp_load_save_context = private_context;
1276
1277     return pp_load_save_context->dest_h / 8;
1278 }
1279
1280 static int
1281 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1282 {
1283     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1284
1285     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1286     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1287     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1288     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1289
1290     return 0;
1291 }
1292
1293 static VAStatus
1294 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1295                                 const struct i965_surface *src_surface,
1296                                 const VARectangle *src_rect,
1297                                 struct i965_surface *dst_surface,
1298                                 const VARectangle *dst_rect,
1299                                 void *filter_param)
1300 {
1301     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1302     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1303     int width[3], height[3], pitch[3], offset[3];
1304     const int Y = 0;
1305
1306     /* source surface */
1307     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1308                                     width, height, pitch, offset);
1309
1310     /* destination surface */
1311     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1312                                     width, height, pitch, offset);
1313
1314     /* private function & data */
1315     pp_context->pp_x_steps = pp_load_save_x_steps;
1316     pp_context->pp_y_steps = pp_load_save_y_steps;
1317     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1318     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1319     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1320
1321     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1322     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1323
1324     dst_surface->flags = src_surface->flags;
1325
1326     return VA_STATUS_SUCCESS;
1327 }
1328
1329 static int
1330 pp_scaling_x_steps(void *private_context)
1331 {
1332     return 1;
1333 }
1334
1335 static int
1336 pp_scaling_y_steps(void *private_context)
1337 {
1338     struct pp_scaling_context *pp_scaling_context = private_context;
1339
1340     return pp_scaling_context->dest_h / 8;
1341 }
1342
1343 static int
1344 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1345 {
1346     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1347     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1348     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1349     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1350     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1351
1352     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1353     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1354     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1355     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1356     
1357     return 0;
1358 }
1359
1360 static VAStatus
1361 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1362                            const struct i965_surface *src_surface,
1363                            const VARectangle *src_rect,
1364                            struct i965_surface *dst_surface,
1365                            const VARectangle *dst_rect,
1366                            void *filter_param)
1367 {
1368     struct i965_driver_data *i965 = i965_driver_data(ctx);
1369     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1370     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1371     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1372     struct object_surface *obj_surface;
1373     struct i965_sampler_state *sampler_state;
1374     int in_w, in_h, in_wpitch, in_hpitch;
1375     int out_w, out_h, out_wpitch, out_hpitch;
1376
1377     /* source surface */
1378     obj_surface = SURFACE(src_surface->id);
1379     in_w = obj_surface->orig_width;
1380     in_h = obj_surface->orig_height;
1381     in_wpitch = obj_surface->width;
1382     in_hpitch = obj_surface->height;
1383
1384     /* source Y surface index 1 */
1385     i965_pp_set_surface_state(ctx, pp_context,
1386                               obj_surface->bo, 0,
1387                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1388                               1, 0);
1389
1390     /* source UV surface index 2 */
1391     i965_pp_set_surface_state(ctx, pp_context,
1392                               obj_surface->bo, in_wpitch * in_hpitch,
1393                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1394                               2, 0);
1395
1396     /* destination surface */
1397     obj_surface = SURFACE(dst_surface->id);
1398     out_w = obj_surface->orig_width;
1399     out_h = obj_surface->orig_height;
1400     out_wpitch = obj_surface->width;
1401     out_hpitch = obj_surface->height;
1402
1403     /* destination Y surface index 7 */
1404     i965_pp_set_surface_state(ctx, pp_context,
1405                               obj_surface->bo, 0,
1406                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1407                               7, 1);
1408
1409     /* destination UV surface index 8 */
1410     i965_pp_set_surface_state(ctx, pp_context,
1411                               obj_surface->bo, out_wpitch * out_hpitch,
1412                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1413                               8, 1);
1414
1415     /* sampler state */
1416     dri_bo_map(pp_context->sampler_state_table.bo, True);
1417     assert(pp_context->sampler_state_table.bo->virtual);
1418     sampler_state = pp_context->sampler_state_table.bo->virtual;
1419
1420     /* SIMD16 Y index 1 */
1421     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1422     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1423     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1424     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1425     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1426
1427     /* SIMD16 UV index 2 */
1428     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1429     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1430     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1431     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1432     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1433
1434     dri_bo_unmap(pp_context->sampler_state_table.bo);
1435
1436     /* private function & data */
1437     pp_context->pp_x_steps = pp_scaling_x_steps;
1438     pp_context->pp_y_steps = pp_scaling_y_steps;
1439     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1440
1441     pp_scaling_context->dest_x = dst_rect->x;
1442     pp_scaling_context->dest_y = dst_rect->y;
1443     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1444     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1445     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1446     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1447
1448     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1449
1450     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1451     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1452     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1453     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1454     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1455
1456     dst_surface->flags = src_surface->flags;
1457
1458     return VA_STATUS_SUCCESS;
1459 }
1460
1461 static int
1462 pp_avs_x_steps(void *private_context)
1463 {
1464     struct pp_avs_context *pp_avs_context = private_context;
1465
1466     return pp_avs_context->dest_w / 16;
1467 }
1468
1469 static int
1470 pp_avs_y_steps(void *private_context)
1471 {
1472     return 1;
1473 }
1474
1475 static int
1476 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1477 {
1478     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1479     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1480     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1481     float src_x_steping, src_y_steping, video_step_delta;
1482     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1483
1484     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1485         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1486         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1487     } else if (tmp_w >= pp_avs_context->dest_w) {
1488         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1489         pp_inline_parameter->grf6.video_step_delta = 0;
1490         
1491         if (x == 0) {
1492             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1493                 pp_avs_context->src_normalized_x;
1494         } else {
1495             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1496             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1497             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1498                 16 * 15 * video_step_delta / 2;
1499         }
1500     } else {
1501         int n0, n1, n2, nls_left, nls_right;
1502         int factor_a = 5, factor_b = 4;
1503         float f;
1504
1505         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1506         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1507         n2 = tmp_w / (16 * factor_a);
1508         nls_left = n0 + n2;
1509         nls_right = n1 + n2;
1510         f = (float) n2 * 16 / tmp_w;
1511         
1512         if (n0 < 5) {
1513             pp_inline_parameter->grf6.video_step_delta = 0.0;
1514
1515             if (x == 0) {
1516                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1517                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1518             } else {
1519                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1520                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1521                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1522                     16 * 15 * video_step_delta / 2;
1523             }
1524         } else {
1525             if (x < nls_left) {
1526                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1527                 float a = f / (nls_left * 16 * factor_b);
1528                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1529                 
1530                 pp_inline_parameter->grf6.video_step_delta = b;
1531
1532                 if (x == 0) {
1533                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1534                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1535                 } else {
1536                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1537                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1538                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1539                         16 * 15 * video_step_delta / 2;
1540                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1541                 }
1542             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1543                 /* scale the center linearly */
1544                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1545                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1546                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1547                     16 * 15 * video_step_delta / 2;
1548                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1549                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1550             } else {
1551                 float a = f / (nls_right * 16 * factor_b);
1552                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1553
1554                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1555                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1556                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1557                     16 * 15 * video_step_delta / 2;
1558                 pp_inline_parameter->grf6.video_step_delta = -b;
1559
1560                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1561                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1562                 else
1563                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1564             }
1565         }
1566     }
1567
1568     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1569     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1570     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1571     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1572
1573     return 0;
1574 }
1575
1576 static VAStatus
1577 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1578                        const struct i965_surface *src_surface,
1579                        const VARectangle *src_rect,
1580                        struct i965_surface *dst_surface,
1581                        const VARectangle *dst_rect,
1582                        void *filter_param,
1583                        int nlas)
1584 {
1585     struct i965_driver_data *i965 = i965_driver_data(ctx);
1586     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1587     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1588     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1589     struct object_surface *obj_surface;
1590     struct i965_sampler_8x8 *sampler_8x8;
1591     struct i965_sampler_8x8_state *sampler_8x8_state;
1592     int index;
1593     int in_w, in_h, in_wpitch, in_hpitch;
1594     int out_w, out_h, out_wpitch, out_hpitch;
1595     int i;
1596
1597     /* surface */
1598     obj_surface = SURFACE(src_surface->id);
1599     in_w = obj_surface->orig_width;
1600     in_h = obj_surface->orig_height;
1601     in_wpitch = obj_surface->width;
1602     in_hpitch = obj_surface->height;
1603
1604     /* source Y surface index 1 */
1605     i965_pp_set_surface2_state(ctx, pp_context,
1606                                obj_surface->bo, 0,
1607                                in_w, in_h, in_wpitch,
1608                                0, 0,
1609                                SURFACE_FORMAT_Y8_UNORM, 0,
1610                                1);
1611
1612     /* source UV surface index 2 */
1613     i965_pp_set_surface2_state(ctx, pp_context,
1614                                obj_surface->bo, in_wpitch * in_hpitch,
1615                                in_w / 2, in_h / 2, in_wpitch,
1616                                0, 0,
1617                                SURFACE_FORMAT_R8B8_UNORM, 0,
1618                                2);
1619
1620     /* destination surface */
1621     obj_surface = SURFACE(dst_surface->id);
1622     out_w = obj_surface->orig_width;
1623     out_h = obj_surface->orig_height;
1624     out_wpitch = obj_surface->width;
1625     out_hpitch = obj_surface->height;
1626     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1627
1628     /* destination Y surface index 7 */
1629     i965_pp_set_surface_state(ctx, pp_context,
1630                               obj_surface->bo, 0,
1631                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1632                               7, 1);
1633
1634     /* destination UV surface index 8 */
1635     i965_pp_set_surface_state(ctx, pp_context,
1636                               obj_surface->bo, out_wpitch * out_hpitch,
1637                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1638                               8, 1);
1639
1640     /* sampler 8x8 state */
1641     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1642     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1643     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1644     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1645     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1646
1647     for (i = 0; i < 17; i++) {
1648         /* for Y channel, currently ignore */
1649         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1650         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1651         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1652         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1653         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1654         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1655         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1656         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1657         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1658         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1659         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1660         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1661         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1662         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1663         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1664         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1665         /* for U/V channel, 0.25 */
1666         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1667         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1668         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1669         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1670         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1671         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1672         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1673         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1674         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1675         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1676         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1677         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1678         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1679         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1680         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1681         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1682     }
1683
1684     sampler_8x8_state->dw136.default_sharpness_level = 0;
1685     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1686     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1687     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1688     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1689
1690     /* sampler 8x8 */
1691     dri_bo_map(pp_context->sampler_state_table.bo, True);
1692     assert(pp_context->sampler_state_table.bo->virtual);
1693     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1694     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1695
1696     /* sample_8x8 Y index 1 */
1697     index = 1;
1698     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1699     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1700     sampler_8x8[index].dw0.ief_bypass = 1;
1701     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1702     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1703     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1704     sampler_8x8[index].dw2.global_noise_estimation = 22;
1705     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1706     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1707     sampler_8x8[index].dw3.strong_edge_weight = 7;
1708     sampler_8x8[index].dw3.regular_weight = 2;
1709     sampler_8x8[index].dw3.non_edge_weight = 0;
1710     sampler_8x8[index].dw3.gain_factor = 40;
1711     sampler_8x8[index].dw4.steepness_boost = 0;
1712     sampler_8x8[index].dw4.steepness_threshold = 0;
1713     sampler_8x8[index].dw4.mr_boost = 0;
1714     sampler_8x8[index].dw4.mr_threshold = 5;
1715     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1716     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1717     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1718     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1719     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1720     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1721     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1722     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1723     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1724     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1725     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1726     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1727     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1728     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1729     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1730     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1731     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1732     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1733     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1734     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1735     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1736     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1737     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1738     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1739     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1740     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1741     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1742     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1743     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1744     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1745     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1746     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1747     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1748     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1749     sampler_8x8[index].dw13.limiter_boost = 0;
1750     sampler_8x8[index].dw13.minimum_limiter = 10;
1751     sampler_8x8[index].dw13.maximum_limiter = 11;
1752     sampler_8x8[index].dw14.clip_limiter = 130;
1753     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1754                       I915_GEM_DOMAIN_RENDER, 
1755                       0,
1756                       0,
1757                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1758                       pp_context->sampler_state_table.bo_8x8);
1759
1760     /* sample_8x8 UV index 2 */
1761     index = 2;
1762     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1763     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1764     sampler_8x8[index].dw0.ief_bypass = 1;
1765     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1766     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1767     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1768     sampler_8x8[index].dw2.global_noise_estimation = 22;
1769     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1770     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1771     sampler_8x8[index].dw3.strong_edge_weight = 7;
1772     sampler_8x8[index].dw3.regular_weight = 2;
1773     sampler_8x8[index].dw3.non_edge_weight = 0;
1774     sampler_8x8[index].dw3.gain_factor = 40;
1775     sampler_8x8[index].dw4.steepness_boost = 0;
1776     sampler_8x8[index].dw4.steepness_threshold = 0;
1777     sampler_8x8[index].dw4.mr_boost = 0;
1778     sampler_8x8[index].dw4.mr_threshold = 5;
1779     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1780     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1781     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1782     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1783     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1784     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1785     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1786     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1787     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1788     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1789     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1790     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1791     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1792     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1793     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1794     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1795     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1796     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1797     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1798     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1799     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1800     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1801     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1802     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1803     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1804     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1805     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1806     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1807     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1808     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1809     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1810     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1811     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1812     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1813     sampler_8x8[index].dw13.limiter_boost = 0;
1814     sampler_8x8[index].dw13.minimum_limiter = 10;
1815     sampler_8x8[index].dw13.maximum_limiter = 11;
1816     sampler_8x8[index].dw14.clip_limiter = 130;
1817     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1818                       I915_GEM_DOMAIN_RENDER, 
1819                       0,
1820                       0,
1821                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1822                       pp_context->sampler_state_table.bo_8x8);
1823
1824     dri_bo_unmap(pp_context->sampler_state_table.bo);
1825
1826     /* private function & data */
1827     pp_context->pp_x_steps = pp_avs_x_steps;
1828     pp_context->pp_y_steps = pp_avs_y_steps;
1829     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1830
1831     pp_avs_context->dest_x = dst_rect->x;
1832     pp_avs_context->dest_y = dst_rect->y;
1833     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1834     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1835     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
1836     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
1837     pp_avs_context->src_w = src_rect->width;
1838     pp_avs_context->src_h = src_rect->height;
1839
1840     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
1841     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1842
1843     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1844     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
1845     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
1846     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1847     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1848     pp_inline_parameter->grf6.video_step_delta = 0.0;
1849
1850     dst_surface->flags = src_surface->flags;
1851
1852     return VA_STATUS_SUCCESS;
1853 }
1854
1855 static VAStatus
1856 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1857                             const struct i965_surface *src_surface,
1858                             const VARectangle *src_rect,
1859                             struct i965_surface *dst_surface,
1860                             const VARectangle *dst_rect,
1861                             void *filter_param)
1862 {
1863     return pp_nv12_avs_initialize(ctx, pp_context,
1864                                   src_surface,
1865                                   src_rect,
1866                                   dst_surface,
1867                                   dst_rect,
1868                                   filter_param,
1869                                   1);
1870 }
1871
1872 static VAStatus
1873 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1874                              const struct i965_surface *src_surface,
1875                              const VARectangle *src_rect,
1876                              struct i965_surface *dst_surface,
1877                              const VARectangle *dst_rect,
1878                              void *filter_param)
1879 {
1880     return pp_nv12_avs_initialize(ctx, pp_context,
1881                                   src_surface,
1882                                   src_rect,
1883                                   dst_surface,
1884                                   dst_rect,
1885                                   filter_param,
1886                                   0);    
1887 }
1888
1889 static int
1890 gen7_pp_avs_x_steps(void *private_context)
1891 {
1892     struct pp_avs_context *pp_avs_context = private_context;
1893
1894     return pp_avs_context->dest_w / 16;
1895 }
1896
1897 static int
1898 gen7_pp_avs_y_steps(void *private_context)
1899 {
1900     struct pp_avs_context *pp_avs_context = private_context;
1901
1902     return pp_avs_context->dest_h / 16;
1903 }
1904
1905 static int
1906 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1907 {
1908     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1909     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1910
1911     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1912     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
1913     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
1914     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
1915
1916     return 0;
1917 }
1918
1919 static VAStatus
1920 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1921                            const struct i965_surface *src_surface,
1922                            const VARectangle *src_rect,
1923                            struct i965_surface *dst_surface,
1924                            const VARectangle *dst_rect,
1925                            void *filter_param)
1926 {
1927     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1928     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1929     struct gen7_sampler_8x8 *sampler_8x8;
1930     struct i965_sampler_8x8_state *sampler_8x8_state;
1931     int index, i;
1932     int width[3], height[3], pitch[3], offset[3];
1933
1934     /* source surface */
1935     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
1936                                          width, height, pitch, offset);
1937
1938     /* destination surface */
1939     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
1940                                          width, height, pitch, offset);
1941
1942     /* sampler 8x8 state */
1943     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1944     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1945     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1946     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1947     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1948
1949     for (i = 0; i < 17; i++) {
1950         /* for Y channel, currently ignore */
1951         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
1952         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
1953         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
1954         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
1955         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
1956         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
1957         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
1958         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
1959         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
1960         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
1961         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
1962         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
1963         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
1964         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
1965         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
1966         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
1967         /* for U/V channel, 0.25 */
1968         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1969         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1970         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1971         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1972         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1973         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1974         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1975         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1976         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1977         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1978         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1979         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1980         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1981         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1982         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1983         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1984     }
1985
1986     sampler_8x8_state->dw136.default_sharpness_level = 0;
1987     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1988     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1989     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1990     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1991
1992     /* sampler 8x8 */
1993     dri_bo_map(pp_context->sampler_state_table.bo, True);
1994     assert(pp_context->sampler_state_table.bo->virtual);
1995     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
1996     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1997
1998     /* sample_8x8 Y index 4 */
1999     index = 4;
2000     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2001     sampler_8x8[index].dw0.global_noise_estimation = 255;
2002     sampler_8x8[index].dw0.ief_bypass = 1;
2003
2004     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2005
2006     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2007     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2008     sampler_8x8[index].dw2.r5x_coefficient = 9;
2009     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2010     sampler_8x8[index].dw2.r5c_coefficient = 3;
2011
2012     sampler_8x8[index].dw3.r3x_coefficient = 27;
2013     sampler_8x8[index].dw3.r3c_coefficient = 5;
2014     sampler_8x8[index].dw3.gain_factor = 40;
2015     sampler_8x8[index].dw3.non_edge_weight = 1;
2016     sampler_8x8[index].dw3.regular_weight = 2;
2017     sampler_8x8[index].dw3.strong_edge_weight = 7;
2018     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2019
2020     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2021                       I915_GEM_DOMAIN_RENDER, 
2022                       0,
2023                       0,
2024                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2025                       pp_context->sampler_state_table.bo_8x8);
2026
2027     /* sample_8x8 UV index 8 */
2028     index = 8;
2029     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2030     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2031     sampler_8x8[index].dw0.global_noise_estimation = 255;
2032     sampler_8x8[index].dw0.ief_bypass = 1;
2033     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2034     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2035     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2036     sampler_8x8[index].dw2.r5x_coefficient = 9;
2037     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2038     sampler_8x8[index].dw2.r5c_coefficient = 3;
2039     sampler_8x8[index].dw3.r3x_coefficient = 27;
2040     sampler_8x8[index].dw3.r3c_coefficient = 5;
2041     sampler_8x8[index].dw3.gain_factor = 40;
2042     sampler_8x8[index].dw3.non_edge_weight = 1;
2043     sampler_8x8[index].dw3.regular_weight = 2;
2044     sampler_8x8[index].dw3.strong_edge_weight = 7;
2045     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2046
2047     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2048                       I915_GEM_DOMAIN_RENDER, 
2049                       0,
2050                       0,
2051                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2052                       pp_context->sampler_state_table.bo_8x8);
2053
2054     /* sampler_8x8 V, index 12 */
2055     index = 12;
2056     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2057     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2058     sampler_8x8[index].dw0.global_noise_estimation = 255;
2059     sampler_8x8[index].dw0.ief_bypass = 1;
2060     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2061     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2062     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2063     sampler_8x8[index].dw2.r5x_coefficient = 9;
2064     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2065     sampler_8x8[index].dw2.r5c_coefficient = 3;
2066     sampler_8x8[index].dw3.r3x_coefficient = 27;
2067     sampler_8x8[index].dw3.r3c_coefficient = 5;
2068     sampler_8x8[index].dw3.gain_factor = 40;
2069     sampler_8x8[index].dw3.non_edge_weight = 1;
2070     sampler_8x8[index].dw3.regular_weight = 2;
2071     sampler_8x8[index].dw3.strong_edge_weight = 7;
2072     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2073
2074     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2075                       I915_GEM_DOMAIN_RENDER, 
2076                       0,
2077                       0,
2078                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2079                       pp_context->sampler_state_table.bo_8x8);
2080
2081     dri_bo_unmap(pp_context->sampler_state_table.bo);
2082
2083     /* private function & data */
2084     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2085     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2086     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2087
2088     pp_avs_context->dest_x = dst_rect->x;
2089     pp_avs_context->dest_y = dst_rect->y;
2090     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2091     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2092     pp_avs_context->src_w = src_rect->width;
2093     pp_avs_context->src_h = src_rect->height;
2094
2095     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2096     dw = MAX(dw, pp_avs_context->dest_w);
2097
2098     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2099     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2100     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2101     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2102     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2103
2104     dst_surface->flags = src_surface->flags;
2105
2106     return VA_STATUS_SUCCESS;
2107 }
2108
2109 static int
2110 pp_dndi_x_steps(void *private_context)
2111 {
2112     return 1;
2113 }
2114
2115 static int
2116 pp_dndi_y_steps(void *private_context)
2117 {
2118     struct pp_dndi_context *pp_dndi_context = private_context;
2119
2120     return pp_dndi_context->dest_h / 4;
2121 }
2122
2123 static int
2124 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2125 {
2126     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2127
2128     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2129     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2130
2131     return 0;
2132 }
2133
2134 static VAStatus
2135 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2136                         const struct i965_surface *src_surface,
2137                         const VARectangle *src_rect,
2138                         struct i965_surface *dst_surface,
2139                         const VARectangle *dst_rect,
2140                         void *filter_param)
2141 {
2142     struct i965_driver_data *i965 = i965_driver_data(ctx);
2143     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2144     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2145     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2146     struct object_surface *obj_surface;
2147     struct i965_sampler_dndi *sampler_dndi;
2148     int index;
2149     int w, h;
2150     int orig_w, orig_h;
2151     int dndi_top_first = 1;
2152
2153     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2154         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2155
2156     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2157         dndi_top_first = 1;
2158     else
2159         dndi_top_first = 0;
2160
2161     /* surface */
2162     obj_surface = SURFACE(src_surface->id);
2163     orig_w = obj_surface->orig_width;
2164     orig_h = obj_surface->orig_height;
2165     w = obj_surface->width;
2166     h = obj_surface->height;
2167
2168     if (pp_context->stmm.bo == NULL) {
2169         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2170                                            "STMM surface",
2171                                            w * h,
2172                                            4096);
2173         assert(pp_context->stmm.bo);
2174     }
2175
2176     /* source UV surface index 2 */
2177     i965_pp_set_surface_state(ctx, pp_context,
2178                               obj_surface->bo, w * h,
2179                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2180                               2, 0);
2181
2182     /* source YUV surface index 4 */
2183     i965_pp_set_surface2_state(ctx, pp_context,
2184                                obj_surface->bo, 0,
2185                                orig_w, orig_h, w,
2186                                0, h,
2187                                SURFACE_FORMAT_PLANAR_420_8, 1,
2188                                4);
2189
2190     /* source STMM surface index 20 */
2191     i965_pp_set_surface_state(ctx, pp_context,
2192                               pp_context->stmm.bo, 0,
2193                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2194                               20, 1);
2195
2196     /* destination surface */
2197     obj_surface = SURFACE(dst_surface->id);
2198     orig_w = obj_surface->orig_width;
2199     orig_h = obj_surface->orig_height;
2200     w = obj_surface->width;
2201     h = obj_surface->height;
2202
2203     /* destination Y surface index 7 */
2204     i965_pp_set_surface_state(ctx, pp_context,
2205                               obj_surface->bo, 0,
2206                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2207                               7, 1);
2208
2209     /* destination UV surface index 8 */
2210     i965_pp_set_surface_state(ctx, pp_context,
2211                               obj_surface->bo, w * h,
2212                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2213                               8, 1);
2214     /* sampler dndi */
2215     dri_bo_map(pp_context->sampler_state_table.bo, True);
2216     assert(pp_context->sampler_state_table.bo->virtual);
2217     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2218     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2219
2220     /* sample dndi index 1 */
2221     index = 0;
2222     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2223     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2224     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2225     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2226
2227     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2228     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2229     sampler_dndi[index].dw1.stmm_c2 = 1;
2230     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2231     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2232
2233     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2234     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2235     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2236     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2237
2238     sampler_dndi[index].dw3.maximum_stmm = 128;
2239     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2240     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2241     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2242     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2243
2244     sampler_dndi[index].dw4.sdi_delta = 8;
2245     sampler_dndi[index].dw4.sdi_threshold = 128;
2246     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2247     sampler_dndi[index].dw4.stmm_shift_up = 0;
2248     sampler_dndi[index].dw4.stmm_shift_down = 0;
2249     sampler_dndi[index].dw4.minimum_stmm = 0;
2250
2251     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2252     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2253     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2254     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2255
2256     sampler_dndi[index].dw6.dn_enable = 1;
2257     sampler_dndi[index].dw6.di_enable = 1;
2258     sampler_dndi[index].dw6.di_partial = 0;
2259     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2260     sampler_dndi[index].dw6.dndi_stream_id = 0;
2261     sampler_dndi[index].dw6.dndi_first_frame = 1;
2262     sampler_dndi[index].dw6.progressive_dn = 0;
2263     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2264     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2265     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2266
2267     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2268     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2269     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2270     sampler_dndi[index].dw7.column_width_minus1 = 0;
2271
2272     dri_bo_unmap(pp_context->sampler_state_table.bo);
2273
2274     /* private function & data */
2275     pp_context->pp_x_steps = pp_dndi_x_steps;
2276     pp_context->pp_y_steps = pp_dndi_y_steps;
2277     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2278
2279     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2280     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2281     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2282     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2283
2284     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2285     pp_inline_parameter->grf5.number_blocks = w / 16;
2286     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2287     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2288
2289     pp_dndi_context->dest_w = w;
2290     pp_dndi_context->dest_h = h;
2291
2292     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2293
2294     return VA_STATUS_SUCCESS;
2295 }
2296
2297 static int
2298 pp_dn_x_steps(void *private_context)
2299 {
2300     return 1;
2301 }
2302
2303 static int
2304 pp_dn_y_steps(void *private_context)
2305 {
2306     struct pp_dn_context *pp_dn_context = private_context;
2307
2308     return pp_dn_context->dest_h / 8;
2309 }
2310
2311 static int
2312 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2313 {
2314     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2315
2316     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2317     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2318
2319     return 0;
2320 }
2321
2322 static VAStatus
2323 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2324                       const struct i965_surface *src_surface,
2325                       const VARectangle *src_rect,
2326                       struct i965_surface *dst_surface,
2327                       const VARectangle *dst_rect,
2328                       void *filter_param)
2329 {
2330     struct i965_driver_data *i965 = i965_driver_data(ctx);
2331     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2332     struct object_surface *obj_surface;
2333     struct i965_sampler_dndi *sampler_dndi;
2334     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2335     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2336     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2337     int index;
2338     int w, h;
2339     int orig_w, orig_h;
2340     int dn_strength = 15;
2341     int dndi_top_first = 1;
2342     int dn_progressive = 0;
2343
2344     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2345         dndi_top_first = 1;
2346         dn_progressive = 1;
2347     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2348         dndi_top_first = 1;
2349         dn_progressive = 0;
2350     } else {
2351         dndi_top_first = 0;
2352         dn_progressive = 0;
2353     }
2354
2355     if (dn_filter_param) {
2356         float value = dn_filter_param->value;
2357         
2358         if (value > 1.0)
2359             value = 1.0;
2360         
2361         if (value < 0.0)
2362             value = 0.0;
2363
2364         dn_strength = (int)(value * 31.0F);
2365     }
2366
2367     /* surface */
2368     obj_surface = SURFACE(src_surface->id);
2369     orig_w = obj_surface->orig_width;
2370     orig_h = obj_surface->orig_height;
2371     w = obj_surface->width;
2372     h = obj_surface->height;
2373
2374     if (pp_context->stmm.bo == NULL) {
2375         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2376                                            "STMM surface",
2377                                            w * h,
2378                                            4096);
2379         assert(pp_context->stmm.bo);
2380     }
2381
2382     /* source UV surface index 2 */
2383     i965_pp_set_surface_state(ctx, pp_context,
2384                               obj_surface->bo, w * h,
2385                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2386                               2, 0);
2387
2388     /* source YUV surface index 4 */
2389     i965_pp_set_surface2_state(ctx, pp_context,
2390                                obj_surface->bo, 0,
2391                                orig_w, orig_h, w,
2392                                0, h,
2393                                SURFACE_FORMAT_PLANAR_420_8, 1,
2394                                4);
2395
2396     /* source STMM surface index 20 */
2397     i965_pp_set_surface_state(ctx, pp_context,
2398                               pp_context->stmm.bo, 0,
2399                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2400                               20, 1);
2401
2402     /* destination surface */
2403     obj_surface = SURFACE(dst_surface->id);
2404     orig_w = obj_surface->orig_width;
2405     orig_h = obj_surface->orig_height;
2406     w = obj_surface->width;
2407     h = obj_surface->height;
2408
2409     /* destination Y surface index 7 */
2410     i965_pp_set_surface_state(ctx, pp_context,
2411                               obj_surface->bo, 0,
2412                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2413                               7, 1);
2414
2415     /* destination UV surface index 8 */
2416     i965_pp_set_surface_state(ctx, pp_context,
2417                               obj_surface->bo, w * h,
2418                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2419                               8, 1);
2420     /* sampler dn */
2421     dri_bo_map(pp_context->sampler_state_table.bo, True);
2422     assert(pp_context->sampler_state_table.bo->virtual);
2423     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2424     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2425
2426     /* sample dndi index 1 */
2427     index = 0;
2428     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2429     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2430     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2431     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2432
2433     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2434     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2435     sampler_dndi[index].dw1.stmm_c2 = 0;
2436     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2437     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2438
2439     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2440     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2441     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2442     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2443
2444     sampler_dndi[index].dw3.maximum_stmm = 128;
2445     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2446     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2447     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2448     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2449
2450     sampler_dndi[index].dw4.sdi_delta = 8;
2451     sampler_dndi[index].dw4.sdi_threshold = 128;
2452     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2453     sampler_dndi[index].dw4.stmm_shift_up = 0;
2454     sampler_dndi[index].dw4.stmm_shift_down = 0;
2455     sampler_dndi[index].dw4.minimum_stmm = 0;
2456
2457     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2458     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2459     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2460     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2461
2462     sampler_dndi[index].dw6.dn_enable = 1;
2463     sampler_dndi[index].dw6.di_enable = 0;
2464     sampler_dndi[index].dw6.di_partial = 0;
2465     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2466     sampler_dndi[index].dw6.dndi_stream_id = 1;
2467     sampler_dndi[index].dw6.dndi_first_frame = 1;
2468     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2469     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2470     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2471     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2472
2473     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2474     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2475     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2476     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2477
2478     dri_bo_unmap(pp_context->sampler_state_table.bo);
2479
2480     /* private function & data */
2481     pp_context->pp_x_steps = pp_dn_x_steps;
2482     pp_context->pp_y_steps = pp_dn_y_steps;
2483     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2484
2485     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2486     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2487     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2488     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2489
2490     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2491     pp_inline_parameter->grf5.number_blocks = w / 16;
2492     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2493     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2494
2495     pp_dn_context->dest_w = w;
2496     pp_dn_context->dest_h = h;
2497
2498     dst_surface->flags = src_surface->flags;
2499     
2500     return VA_STATUS_SUCCESS;
2501 }
2502
2503 static int
2504 gen7_pp_dndi_x_steps(void *private_context)
2505 {
2506     struct pp_dndi_context *pp_dndi_context = private_context;
2507
2508     return pp_dndi_context->dest_w / 16;
2509 }
2510
2511 static int
2512 gen7_pp_dndi_y_steps(void *private_context)
2513 {
2514     struct pp_dndi_context *pp_dndi_context = private_context;
2515
2516     return pp_dndi_context->dest_h / 4;
2517 }
2518
2519 static int
2520 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2521 {
2522     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2523
2524     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2525     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2526
2527     return 0;
2528 }
2529
2530 static VAStatus
2531 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2532                              const struct i965_surface *src_surface,
2533                              const VARectangle *src_rect,
2534                              struct i965_surface *dst_surface,
2535                              const VARectangle *dst_rect,
2536                              void *filter_param)
2537 {
2538     struct i965_driver_data *i965 = i965_driver_data(ctx);
2539     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2540     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2541     struct object_surface *obj_surface;
2542     struct gen7_sampler_dndi *sampler_dndi;
2543     int index;
2544     int w, h;
2545     int orig_w, orig_h;
2546     int dndi_top_first = 1;
2547
2548     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2549         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2550
2551     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2552         dndi_top_first = 1;
2553     else
2554         dndi_top_first = 0;
2555
2556     /* surface */
2557     obj_surface = SURFACE(src_surface->id);
2558     orig_w = obj_surface->orig_width;
2559     orig_h = obj_surface->orig_height;
2560     w = obj_surface->width;
2561     h = obj_surface->height;
2562
2563     if (pp_context->stmm.bo == NULL) {
2564         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2565                                            "STMM surface",
2566                                            w * h,
2567                                            4096);
2568         assert(pp_context->stmm.bo);
2569     }
2570
2571     /* source UV surface index 1 */
2572     gen7_pp_set_surface_state(ctx, pp_context,
2573                               obj_surface->bo, w * h,
2574                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2575                               1, 0);
2576
2577     /* source YUV surface index 3 */
2578     gen7_pp_set_surface2_state(ctx, pp_context,
2579                                obj_surface->bo, 0,
2580                                orig_w, orig_h, w,
2581                                0, h,
2582                                SURFACE_FORMAT_PLANAR_420_8, 1,
2583                                3);
2584
2585     /* source (temporal reference) YUV surface index 4 */
2586     gen7_pp_set_surface2_state(ctx, pp_context,
2587                                obj_surface->bo, 0,
2588                                orig_w, orig_h, w,
2589                                0, h,
2590                                SURFACE_FORMAT_PLANAR_420_8, 1,
2591                                4);
2592
2593     /* STMM / History Statistics input surface, index 5 */
2594     gen7_pp_set_surface_state(ctx, pp_context,
2595                               pp_context->stmm.bo, 0,
2596                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2597                               5, 1);
2598
2599     /* destination surface */
2600     obj_surface = SURFACE(dst_surface->id);
2601     orig_w = obj_surface->orig_width;
2602     orig_h = obj_surface->orig_height;
2603     w = obj_surface->width;
2604     h = obj_surface->height;
2605
2606     /* destination(Previous frame) Y surface index 27 */
2607     gen7_pp_set_surface_state(ctx, pp_context,
2608                               obj_surface->bo, 0,
2609                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2610                               27, 1);
2611
2612     /* destination(Previous frame) UV surface index 28 */
2613     gen7_pp_set_surface_state(ctx, pp_context,
2614                               obj_surface->bo, w * h,
2615                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2616                               28, 1);
2617
2618     /* destination(Current frame) Y surface index 30 */
2619     gen7_pp_set_surface_state(ctx, pp_context,
2620                               obj_surface->bo, 0,
2621                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2622                               30, 1);
2623
2624     /* destination(Current frame) UV surface index 31 */
2625     gen7_pp_set_surface_state(ctx, pp_context,
2626                               obj_surface->bo, w * h,
2627                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2628                               31, 1);
2629
2630     /* STMM output surface, index 33 */
2631     gen7_pp_set_surface_state(ctx, pp_context,
2632                               pp_context->stmm.bo, 0,
2633                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2634                               33, 1);
2635
2636
2637     /* sampler dndi */
2638     dri_bo_map(pp_context->sampler_state_table.bo, True);
2639     assert(pp_context->sampler_state_table.bo->virtual);
2640     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2641     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2642
2643     /* sample dndi index 0 */
2644     index = 0;
2645     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2646     sampler_dndi[index].dw0.dnmh_delt = 8;
2647     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2648     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2649     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2650     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2651
2652     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2653     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2654     sampler_dndi[index].dw1.stmm_c2 = 0;
2655     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2656     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2657
2658     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2659     sampler_dndi[index].dw2.bne_edge_th = 1;
2660     sampler_dndi[index].dw2.smooth_mv_th = 0;
2661     sampler_dndi[index].dw2.sad_tight_th = 5;
2662     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2663     sampler_dndi[index].dw2.good_neighbor_th = 4;
2664
2665     sampler_dndi[index].dw3.maximum_stmm = 128;
2666     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2667     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2668     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2669     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2670
2671     sampler_dndi[index].dw4.sdi_delta = 8;
2672     sampler_dndi[index].dw4.sdi_threshold = 128;
2673     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2674     sampler_dndi[index].dw4.stmm_shift_up = 0;
2675     sampler_dndi[index].dw4.stmm_shift_down = 0;
2676     sampler_dndi[index].dw4.minimum_stmm = 0;
2677
2678     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2679     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2680     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2681     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2682
2683     sampler_dndi[index].dw6.dn_enable = 0;
2684     sampler_dndi[index].dw6.di_enable = 1;
2685     sampler_dndi[index].dw6.di_partial = 0;
2686     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2687     sampler_dndi[index].dw6.dndi_stream_id = 1;
2688     sampler_dndi[index].dw6.dndi_first_frame = 1;
2689     sampler_dndi[index].dw6.progressive_dn = 0;
2690     sampler_dndi[index].dw6.mcdi_enable = 0;
2691     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2692     sampler_dndi[index].dw6.cat_th1 = 0;
2693     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2694     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2695
2696     sampler_dndi[index].dw7.sad_tha = 5;
2697     sampler_dndi[index].dw7.sad_thb = 10;
2698     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2699     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2700     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2701     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2702     sampler_dndi[index].dw7.neighborpixel_th = 10;
2703     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2704
2705     dri_bo_unmap(pp_context->sampler_state_table.bo);
2706
2707     /* private function & data */
2708     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
2709     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
2710     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
2711
2712     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2713     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2714     pp_static_parameter->grf1.di_top_field_first = 0;
2715     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2716
2717     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2718     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2719     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2720
2721     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2722     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2723
2724     pp_dndi_context->dest_w = w;
2725     pp_dndi_context->dest_h = h;
2726
2727     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2728
2729     return VA_STATUS_SUCCESS;
2730 }
2731
2732 static int
2733 gen7_pp_dn_x_steps(void *private_context)
2734 {
2735     return 1;
2736 }
2737
2738 static int
2739 gen7_pp_dn_y_steps(void *private_context)
2740 {
2741     struct pp_dn_context *pp_dn_context = private_context;
2742
2743     return pp_dn_context->dest_h / 4;
2744 }
2745
2746 static int
2747 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2748 {
2749     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2750
2751     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2752     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2753
2754     return 0;
2755 }
2756
2757 static VAStatus
2758 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2759                            const struct i965_surface *src_surface,
2760                            const VARectangle *src_rect,
2761                            struct i965_surface *dst_surface,
2762                            const VARectangle *dst_rect,
2763                            void *filter_param)
2764 {
2765     struct i965_driver_data *i965 = i965_driver_data(ctx);
2766     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2767     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2768     struct object_surface *obj_surface;
2769     struct gen7_sampler_dndi *sampler_dn;
2770     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2771     int index;
2772     int w, h;
2773     int orig_w, orig_h;
2774     int dn_strength = 15;
2775     int dndi_top_first = 1;
2776     int dn_progressive = 0;
2777
2778     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2779         dndi_top_first = 1;
2780         dn_progressive = 1;
2781     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2782         dndi_top_first = 1;
2783         dn_progressive = 0;
2784     } else {
2785         dndi_top_first = 0;
2786         dn_progressive = 0;
2787     }
2788
2789     if (dn_filter_param) {
2790         float value = dn_filter_param->value;
2791         
2792         if (value > 1.0)
2793             value = 1.0;
2794         
2795         if (value < 0.0)
2796             value = 0.0;
2797
2798         dn_strength = (int)(value * 31.0F);
2799     }
2800
2801     /* surface */
2802     obj_surface = SURFACE(src_surface->id);
2803     orig_w = obj_surface->orig_width;
2804     orig_h = obj_surface->orig_height;
2805     w = obj_surface->width;
2806     h = obj_surface->height;
2807
2808     if (pp_context->stmm.bo == NULL) {
2809         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2810                                            "STMM surface",
2811                                            w * h,
2812                                            4096);
2813         assert(pp_context->stmm.bo);
2814     }
2815
2816     /* source UV surface index 1 */
2817     gen7_pp_set_surface_state(ctx, pp_context,
2818                               obj_surface->bo, w * h,
2819                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2820                               1, 0);
2821
2822     /* source YUV surface index 3 */
2823     gen7_pp_set_surface2_state(ctx, pp_context,
2824                                obj_surface->bo, 0,
2825                                orig_w, orig_h, w,
2826                                0, h,
2827                                SURFACE_FORMAT_PLANAR_420_8, 1,
2828                                3);
2829
2830     /* source STMM surface index 5 */
2831     gen7_pp_set_surface_state(ctx, pp_context,
2832                               pp_context->stmm.bo, 0,
2833                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2834                               5, 1);
2835
2836     /* destination surface */
2837     obj_surface = SURFACE(dst_surface->id);
2838     orig_w = obj_surface->orig_width;
2839     orig_h = obj_surface->orig_height;
2840     w = obj_surface->width;
2841     h = obj_surface->height;
2842
2843     /* destination Y surface index 7 */
2844     gen7_pp_set_surface_state(ctx, pp_context,
2845                               obj_surface->bo, 0,
2846                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2847                               7, 1);
2848
2849     /* destination UV surface index 8 */
2850     gen7_pp_set_surface_state(ctx, pp_context,
2851                               obj_surface->bo, w * h,
2852                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2853                               8, 1);
2854     /* sampler dn */
2855     dri_bo_map(pp_context->sampler_state_table.bo, True);
2856     assert(pp_context->sampler_state_table.bo->virtual);
2857     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
2858     sampler_dn = pp_context->sampler_state_table.bo->virtual;
2859
2860     /* sample dn index 1 */
2861     index = 0;
2862     sampler_dn[index].dw0.denoise_asd_threshold = 0;
2863     sampler_dn[index].dw0.dnmh_delt = 8;
2864     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
2865     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
2866     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
2867     sampler_dn[index].dw0.denoise_stad_threshold = 0;
2868
2869     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2870     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
2871     sampler_dn[index].dw1.stmm_c2 = 0;
2872     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
2873     sampler_dn[index].dw1.temporal_difference_threshold = 16;
2874
2875     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2876     sampler_dn[index].dw2.bne_edge_th = 1;
2877     sampler_dn[index].dw2.smooth_mv_th = 0;
2878     sampler_dn[index].dw2.sad_tight_th = 5;
2879     sampler_dn[index].dw2.cat_slope_minus1 = 9;
2880     sampler_dn[index].dw2.good_neighbor_th = 4;
2881
2882     sampler_dn[index].dw3.maximum_stmm = 128;
2883     sampler_dn[index].dw3.multipler_for_vecm = 2;
2884     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2885     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2886     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
2887
2888     sampler_dn[index].dw4.sdi_delta = 8;
2889     sampler_dn[index].dw4.sdi_threshold = 128;
2890     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2891     sampler_dn[index].dw4.stmm_shift_up = 0;
2892     sampler_dn[index].dw4.stmm_shift_down = 0;
2893     sampler_dn[index].dw4.minimum_stmm = 0;
2894
2895     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
2896     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
2897     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2898     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2899
2900     sampler_dn[index].dw6.dn_enable = 1;
2901     sampler_dn[index].dw6.di_enable = 0;
2902     sampler_dn[index].dw6.di_partial = 0;
2903     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
2904     sampler_dn[index].dw6.dndi_stream_id = 1;
2905     sampler_dn[index].dw6.dndi_first_frame = 1;
2906     sampler_dn[index].dw6.progressive_dn = dn_progressive;
2907     sampler_dn[index].dw6.mcdi_enable = 0;
2908     sampler_dn[index].dw6.fmd_tear_threshold = 32;
2909     sampler_dn[index].dw6.cat_th1 = 0;
2910     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
2911     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
2912
2913     sampler_dn[index].dw7.sad_tha = 5;
2914     sampler_dn[index].dw7.sad_thb = 10;
2915     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2916     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
2917     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2918     sampler_dn[index].dw7.vdi_walker_enable = 0;
2919     sampler_dn[index].dw7.neighborpixel_th = 10;
2920     sampler_dn[index].dw7.column_width_minus1 = w / 16;
2921
2922     dri_bo_unmap(pp_context->sampler_state_table.bo);
2923
2924     /* private function & data */
2925     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
2926     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
2927     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
2928
2929     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2930     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2931     pp_static_parameter->grf1.di_top_field_first = 0;
2932     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2933
2934     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2935     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2936     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2937
2938     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2939     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2940
2941     pp_dn_context->dest_w = w;
2942     pp_dn_context->dest_h = h;
2943
2944     dst_surface->flags = src_surface->flags;
2945
2946     return VA_STATUS_SUCCESS;
2947 }
2948
2949 static VAStatus
2950 ironlake_pp_initialize(
2951     VADriverContextP   ctx,
2952     struct i965_post_processing_context *pp_context,
2953     const struct i965_surface *src_surface,
2954     const VARectangle *src_rect,
2955     struct i965_surface *dst_surface,
2956     const VARectangle *dst_rect,
2957     int                pp_index,
2958     void *filter_param
2959 )
2960 {
2961     VAStatus va_status;
2962     struct i965_driver_data *i965 = i965_driver_data(ctx);
2963     struct pp_module *pp_module;
2964     dri_bo *bo;
2965     int static_param_size, inline_param_size;
2966
2967     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
2968     bo = dri_bo_alloc(i965->intel.bufmgr,
2969                       "surface state & binding table",
2970                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
2971                       4096);
2972     assert(bo);
2973     pp_context->surface_state_binding_table.bo = bo;
2974
2975     dri_bo_unreference(pp_context->curbe.bo);
2976     bo = dri_bo_alloc(i965->intel.bufmgr,
2977                       "constant buffer",
2978                       4096, 
2979                       4096);
2980     assert(bo);
2981     pp_context->curbe.bo = bo;
2982
2983     dri_bo_unreference(pp_context->idrt.bo);
2984     bo = dri_bo_alloc(i965->intel.bufmgr, 
2985                       "interface discriptor", 
2986                       sizeof(struct i965_interface_descriptor), 
2987                       4096);
2988     assert(bo);
2989     pp_context->idrt.bo = bo;
2990     pp_context->idrt.num_interface_descriptors = 0;
2991
2992     dri_bo_unreference(pp_context->sampler_state_table.bo);
2993     bo = dri_bo_alloc(i965->intel.bufmgr, 
2994                       "sampler state table", 
2995                       4096,
2996                       4096);
2997     assert(bo);
2998     dri_bo_map(bo, True);
2999     memset(bo->virtual, 0, bo->size);
3000     dri_bo_unmap(bo);
3001     pp_context->sampler_state_table.bo = bo;
3002
3003     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3004     bo = dri_bo_alloc(i965->intel.bufmgr, 
3005                       "sampler 8x8 state ",
3006                       4096,
3007                       4096);
3008     assert(bo);
3009     pp_context->sampler_state_table.bo_8x8 = bo;
3010
3011     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3012     bo = dri_bo_alloc(i965->intel.bufmgr, 
3013                       "sampler 8x8 state ",
3014                       4096,
3015                       4096);
3016     assert(bo);
3017     pp_context->sampler_state_table.bo_8x8_uv = bo;
3018
3019     dri_bo_unreference(pp_context->vfe_state.bo);
3020     bo = dri_bo_alloc(i965->intel.bufmgr, 
3021                       "vfe state", 
3022                       sizeof(struct i965_vfe_state), 
3023                       4096);
3024     assert(bo);
3025     pp_context->vfe_state.bo = bo;
3026
3027     if (IS_GEN7(i965->intel.device_id)) {
3028         static_param_size = sizeof(struct gen7_pp_static_parameter);
3029         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3030     } else {
3031         static_param_size = sizeof(struct pp_static_parameter);
3032         inline_param_size = sizeof(struct pp_inline_parameter);
3033     }
3034
3035     memset(pp_context->pp_static_parameter, 0, static_param_size);
3036     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3037     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3038     pp_context->current_pp = pp_index;
3039     pp_module = &pp_context->pp_modules[pp_index];
3040     
3041     if (pp_module->initialize)
3042         va_status = pp_module->initialize(ctx, pp_context,
3043                                           src_surface,
3044                                           src_rect,
3045                                           dst_surface,
3046                                           dst_rect,
3047                                           filter_param);
3048     else
3049         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3050
3051     return va_status;
3052 }
3053
3054 static VAStatus
3055 ironlake_post_processing(
3056     VADriverContextP   ctx,
3057     struct i965_post_processing_context *pp_context,
3058     const struct i965_surface *src_surface,
3059     const VARectangle *src_rect,
3060     struct i965_surface *dst_surface,
3061     const VARectangle *dst_rect,
3062     int                pp_index,
3063     void *filter_param
3064 )
3065 {
3066     VAStatus va_status;
3067
3068     va_status = ironlake_pp_initialize(ctx, pp_context,
3069                                        src_surface,
3070                                        src_rect,
3071                                        dst_surface,
3072                                        dst_rect,
3073                                        pp_index,
3074                                        filter_param);
3075
3076     if (va_status == VA_STATUS_SUCCESS) {
3077         ironlake_pp_states_setup(ctx, pp_context);
3078         ironlake_pp_pipeline_setup(ctx, pp_context);
3079     }
3080
3081     return va_status;
3082 }
3083
3084 static VAStatus
3085 gen6_pp_initialize(
3086     VADriverContextP   ctx,
3087     struct i965_post_processing_context *pp_context,
3088     const struct i965_surface *src_surface,
3089     const VARectangle *src_rect,
3090     struct i965_surface *dst_surface,
3091     const VARectangle *dst_rect,
3092     int                pp_index,
3093     void *filter_param
3094 )
3095 {
3096     VAStatus va_status;
3097     struct i965_driver_data *i965 = i965_driver_data(ctx);
3098     struct pp_module *pp_module;
3099     dri_bo *bo;
3100     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3101     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3102
3103     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3104     bo = dri_bo_alloc(i965->intel.bufmgr,
3105                       "surface state & binding table",
3106                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3107                       4096);
3108     assert(bo);
3109     pp_context->surface_state_binding_table.bo = bo;
3110
3111     dri_bo_unreference(pp_context->curbe.bo);
3112     bo = dri_bo_alloc(i965->intel.bufmgr,
3113                       "constant buffer",
3114                       4096, 
3115                       4096);
3116     assert(bo);
3117     pp_context->curbe.bo = bo;
3118
3119     dri_bo_unreference(pp_context->idrt.bo);
3120     bo = dri_bo_alloc(i965->intel.bufmgr, 
3121                       "interface discriptor", 
3122                       sizeof(struct gen6_interface_descriptor_data), 
3123                       4096);
3124     assert(bo);
3125     pp_context->idrt.bo = bo;
3126     pp_context->idrt.num_interface_descriptors = 0;
3127
3128     dri_bo_unreference(pp_context->sampler_state_table.bo);
3129     bo = dri_bo_alloc(i965->intel.bufmgr, 
3130                       "sampler state table", 
3131                       4096,
3132                       4096);
3133     assert(bo);
3134     dri_bo_map(bo, True);
3135     memset(bo->virtual, 0, bo->size);
3136     dri_bo_unmap(bo);
3137     pp_context->sampler_state_table.bo = bo;
3138
3139     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3140     bo = dri_bo_alloc(i965->intel.bufmgr, 
3141                       "sampler 8x8 state ",
3142                       4096,
3143                       4096);
3144     assert(bo);
3145     pp_context->sampler_state_table.bo_8x8 = bo;
3146
3147     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3148     bo = dri_bo_alloc(i965->intel.bufmgr, 
3149                       "sampler 8x8 state ",
3150                       4096,
3151                       4096);
3152     assert(bo);
3153     pp_context->sampler_state_table.bo_8x8_uv = bo;
3154
3155     dri_bo_unreference(pp_context->vfe_state.bo);
3156     bo = dri_bo_alloc(i965->intel.bufmgr, 
3157                       "vfe state", 
3158                       sizeof(struct i965_vfe_state), 
3159                       4096);
3160     assert(bo);
3161     pp_context->vfe_state.bo = bo;
3162     
3163     memset(pp_static_parameter, 0, sizeof(*pp_static_parameter));
3164     memset(pp_inline_parameter, 0, sizeof(*pp_inline_parameter));
3165     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3166     pp_context->current_pp = pp_index;
3167     pp_module = &pp_context->pp_modules[pp_index];
3168     
3169     if (pp_module->initialize)
3170         va_status = pp_module->initialize(ctx, pp_context,
3171                                           src_surface,
3172                                           src_rect,
3173                                           dst_surface,
3174                                           dst_rect,
3175                                           filter_param);
3176     else
3177         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3178
3179     return va_status;
3180 }
3181
3182 static void
3183 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3184                                    struct i965_post_processing_context *pp_context)
3185 {
3186     struct i965_driver_data *i965 = i965_driver_data(ctx);
3187     struct gen6_interface_descriptor_data *desc;
3188     dri_bo *bo;
3189     int pp_index = pp_context->current_pp;
3190
3191     bo = pp_context->idrt.bo;
3192     dri_bo_map(bo, True);
3193     assert(bo->virtual);
3194     desc = bo->virtual;
3195     memset(desc, 0, sizeof(*desc));
3196     desc->desc0.kernel_start_pointer = 
3197         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3198     desc->desc1.single_program_flow = 1;
3199     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3200     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3201     desc->desc2.sampler_state_pointer = 
3202         pp_context->sampler_state_table.bo->offset >> 5;
3203     desc->desc3.binding_table_entry_count = 0;
3204     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3205     desc->desc4.constant_urb_entry_read_offset = 0;
3206
3207     if (IS_GEN7(i965->intel.device_id))
3208         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3209     else
3210         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3211
3212     dri_bo_emit_reloc(bo,
3213                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3214                       0,
3215                       offsetof(struct gen6_interface_descriptor_data, desc0),
3216                       pp_context->pp_modules[pp_index].kernel.bo);
3217
3218     dri_bo_emit_reloc(bo,
3219                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3220                       desc->desc2.sampler_count << 2,
3221                       offsetof(struct gen6_interface_descriptor_data, desc2),
3222                       pp_context->sampler_state_table.bo);
3223
3224     dri_bo_unmap(bo);
3225     pp_context->idrt.num_interface_descriptors++;
3226 }
3227
3228 static void
3229 gen6_pp_upload_constants(VADriverContextP ctx,
3230                          struct i965_post_processing_context *pp_context)
3231 {
3232     struct i965_driver_data *i965 = i965_driver_data(ctx);
3233     unsigned char *constant_buffer;
3234     int param_size;
3235
3236     assert(sizeof(struct pp_static_parameter) == 128);
3237     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3238
3239     if (IS_GEN7(i965->intel.device_id))
3240         param_size = sizeof(struct gen7_pp_static_parameter);
3241     else
3242         param_size = sizeof(struct pp_static_parameter);
3243
3244     dri_bo_map(pp_context->curbe.bo, 1);
3245     assert(pp_context->curbe.bo->virtual);
3246     constant_buffer = pp_context->curbe.bo->virtual;
3247     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3248     dri_bo_unmap(pp_context->curbe.bo);
3249 }
3250
3251 static void
3252 gen6_pp_states_setup(VADriverContextP ctx,
3253                      struct i965_post_processing_context *pp_context)
3254 {
3255     gen6_pp_interface_descriptor_table(ctx, pp_context);
3256     gen6_pp_upload_constants(ctx, pp_context);
3257 }
3258
3259 static void
3260 gen6_pp_pipeline_select(VADriverContextP ctx,
3261                         struct i965_post_processing_context *pp_context)
3262 {
3263     struct intel_batchbuffer *batch = pp_context->batch;
3264
3265     BEGIN_BATCH(batch, 1);
3266     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3267     ADVANCE_BATCH(batch);
3268 }
3269
3270 static void
3271 gen6_pp_state_base_address(VADriverContextP ctx,
3272                            struct i965_post_processing_context *pp_context)
3273 {
3274     struct intel_batchbuffer *batch = pp_context->batch;
3275
3276     BEGIN_BATCH(batch, 10);
3277     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3278     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3279     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3280     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3281     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3282     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3283     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3284     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3285     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3286     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3287     ADVANCE_BATCH(batch);
3288 }
3289
3290 static void
3291 gen6_pp_vfe_state(VADriverContextP ctx,
3292                   struct i965_post_processing_context *pp_context)
3293 {
3294     struct intel_batchbuffer *batch = pp_context->batch;
3295
3296     BEGIN_BATCH(batch, 8);
3297     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3298     OUT_BATCH(batch, 0);
3299     OUT_BATCH(batch,
3300               (pp_context->urb.num_vfe_entries - 1) << 16 |
3301               pp_context->urb.num_vfe_entries << 8);
3302     OUT_BATCH(batch, 0);
3303     OUT_BATCH(batch,
3304               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3305               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3306     OUT_BATCH(batch, 0);
3307     OUT_BATCH(batch, 0);
3308     OUT_BATCH(batch, 0);
3309     ADVANCE_BATCH(batch);
3310 }
3311
3312 static void
3313 gen6_pp_curbe_load(VADriverContextP ctx,
3314                    struct i965_post_processing_context *pp_context)
3315 {
3316     struct intel_batchbuffer *batch = pp_context->batch;
3317
3318     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3319
3320     BEGIN_BATCH(batch, 4);
3321     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3322     OUT_BATCH(batch, 0);
3323     OUT_BATCH(batch,
3324               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3325     OUT_RELOC(batch, 
3326               pp_context->curbe.bo,
3327               I915_GEM_DOMAIN_INSTRUCTION, 0,
3328               0);
3329     ADVANCE_BATCH(batch);
3330 }
3331
3332 static void
3333 gen6_interface_descriptor_load(VADriverContextP ctx,
3334                                struct i965_post_processing_context *pp_context)
3335 {
3336     struct intel_batchbuffer *batch = pp_context->batch;
3337
3338     BEGIN_BATCH(batch, 4);
3339     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3340     OUT_BATCH(batch, 0);
3341     OUT_BATCH(batch,
3342               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3343     OUT_RELOC(batch, 
3344               pp_context->idrt.bo,
3345               I915_GEM_DOMAIN_INSTRUCTION, 0,
3346               0);
3347     ADVANCE_BATCH(batch);
3348 }
3349
3350 static void
3351 gen6_pp_object_walker(VADriverContextP ctx,
3352                       struct i965_post_processing_context *pp_context)
3353 {
3354     struct i965_driver_data *i965 = i965_driver_data(ctx);
3355     struct intel_batchbuffer *batch = pp_context->batch;
3356     int x, x_steps, y, y_steps;
3357     int param_size, command_length_in_dws;
3358     dri_bo *command_buffer;
3359     unsigned int *command_ptr;
3360
3361     if (IS_GEN7(i965->intel.device_id))
3362         param_size = sizeof(struct gen7_pp_inline_parameter);
3363     else
3364         param_size = sizeof(struct pp_inline_parameter);
3365
3366     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3367     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3368     command_length_in_dws = 6 + (param_size >> 2);
3369     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3370                                   "command objects buffer",
3371                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3372                                   4096);
3373
3374     dri_bo_map(command_buffer, 1);
3375     command_ptr = command_buffer->virtual;
3376
3377     for (y = 0; y < y_steps; y++) {
3378         for (x = 0; x < x_steps; x++) {
3379             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3380                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3381                 *command_ptr++ = 0;
3382                 *command_ptr++ = 0;
3383                 *command_ptr++ = 0;
3384                 *command_ptr++ = 0;
3385                 *command_ptr++ = 0;
3386                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3387                 command_ptr += (param_size >> 2);
3388             }
3389         }
3390     }
3391
3392     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3393         *command_ptr++ = 0;
3394
3395     *command_ptr = MI_BATCH_BUFFER_END;
3396
3397     dri_bo_unmap(command_buffer);
3398
3399     BEGIN_BATCH(batch, 2);
3400     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3401     OUT_RELOC(batch, command_buffer, 
3402               I915_GEM_DOMAIN_COMMAND, 0, 
3403               0);
3404     ADVANCE_BATCH(batch);
3405     
3406     dri_bo_unreference(command_buffer);
3407
3408     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3409      * will cause control to pass back to ring buffer 
3410      */
3411     intel_batchbuffer_end_atomic(batch);
3412     intel_batchbuffer_flush(batch);
3413     intel_batchbuffer_start_atomic(batch, 0x1000);
3414 }
3415
3416 static void
3417 gen6_pp_pipeline_setup(VADriverContextP ctx,
3418                        struct i965_post_processing_context *pp_context)
3419 {
3420     struct intel_batchbuffer *batch = pp_context->batch;
3421
3422     intel_batchbuffer_start_atomic(batch, 0x1000);
3423     intel_batchbuffer_emit_mi_flush(batch);
3424     gen6_pp_pipeline_select(ctx, pp_context);
3425     gen6_pp_state_base_address(ctx, pp_context);
3426     gen6_pp_vfe_state(ctx, pp_context);
3427     gen6_pp_curbe_load(ctx, pp_context);
3428     gen6_interface_descriptor_load(ctx, pp_context);
3429     gen6_pp_object_walker(ctx, pp_context);
3430     intel_batchbuffer_end_atomic(batch);
3431 }
3432
3433 static VAStatus
3434 gen6_post_processing(
3435     VADriverContextP   ctx,
3436     struct i965_post_processing_context *pp_context,
3437     const struct i965_surface *src_surface,
3438     const VARectangle *src_rect,
3439     struct i965_surface *dst_surface,
3440     const VARectangle *dst_rect,
3441     int                pp_index,
3442     void * filter_param
3443 )
3444 {
3445     VAStatus va_status;
3446     
3447     va_status = gen6_pp_initialize(ctx, pp_context,
3448                                    src_surface,
3449                                    src_rect,
3450                                    dst_surface,
3451                                    dst_rect,
3452                                    pp_index,
3453                                    filter_param);
3454
3455     if (va_status == VA_STATUS_SUCCESS) {
3456         gen6_pp_states_setup(ctx, pp_context);
3457         gen6_pp_pipeline_setup(ctx, pp_context);
3458     }
3459
3460     return va_status;
3461 }
3462
3463 static VAStatus
3464 i965_post_processing_internal(
3465     VADriverContextP   ctx,
3466     struct i965_post_processing_context *pp_context,
3467     const struct i965_surface *src_surface,
3468     const VARectangle *src_rect,
3469     struct i965_surface *dst_surface,
3470     const VARectangle *dst_rect,
3471     int                pp_index,
3472     void *filter_param
3473 )
3474 {
3475     struct i965_driver_data *i965 = i965_driver_data(ctx);
3476     VAStatus va_status;
3477
3478     if (IS_GEN6(i965->intel.device_id) ||
3479         IS_GEN7(i965->intel.device_id))
3480         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3481     else
3482         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3483     
3484     return va_status;
3485 }
3486
3487 VAStatus 
3488 i965_DestroySurfaces(VADriverContextP ctx,
3489                      VASurfaceID *surface_list,
3490                      int num_surfaces);
3491 VAStatus 
3492 i965_CreateSurfaces(VADriverContextP ctx,
3493                     int width,
3494                     int height,
3495                     int format,
3496                     int num_surfaces,
3497                     VASurfaceID *surfaces);
3498
3499 static void 
3500 i965_vpp_clear_surface(VADriverContextP ctx,
3501                        struct i965_post_processing_context *pp_context,
3502                        VASurfaceID surface,
3503                        unsigned int color)
3504 {
3505     struct i965_driver_data *i965 = i965_driver_data(ctx);
3506     struct intel_batchbuffer *batch = pp_context->batch;
3507     struct object_surface *obj_surface = SURFACE(surface);
3508     unsigned int blt_cmd, br13;
3509     unsigned int tiling = 0, swizzle = 0;
3510     int pitch;
3511
3512     /* Currently only support NV12 surface */
3513     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3514         return;
3515
3516     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3517     blt_cmd = XY_COLOR_BLT_CMD;
3518     pitch = obj_surface->width;
3519
3520     if (tiling != I915_TILING_NONE) {
3521         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3522         pitch >>= 2;
3523     }
3524
3525     br13 = 0xf0 << 16;
3526     br13 |= BR13_8;
3527     br13 |= pitch;
3528
3529     if (IS_GEN6(i965->intel.device_id) ||
3530         IS_GEN7(i965->intel.device_id)) {
3531         intel_batchbuffer_start_atomic_blt(batch, 48);
3532         BEGIN_BLT_BATCH(batch, 12);
3533     } else {
3534         intel_batchbuffer_start_atomic(batch, 48);
3535         BEGIN_BATCH(batch, 12);
3536     }
3537
3538     OUT_BATCH(batch, blt_cmd);
3539     OUT_BATCH(batch, br13);
3540     OUT_BATCH(batch,
3541               0 << 16 |
3542               0);
3543     OUT_BATCH(batch,
3544               obj_surface->height << 16 |
3545               obj_surface->width);
3546     OUT_RELOC(batch, obj_surface->bo, 
3547               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3548               0);
3549     OUT_BATCH(batch, 0x10);
3550
3551     OUT_BATCH(batch, blt_cmd);
3552     OUT_BATCH(batch, br13);
3553     OUT_BATCH(batch,
3554               0 << 16 |
3555               0);
3556     OUT_BATCH(batch,
3557               obj_surface->height / 2 << 16 |
3558               obj_surface->width);
3559     OUT_RELOC(batch, obj_surface->bo, 
3560               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3561               obj_surface->width * obj_surface->y_cb_offset);
3562     OUT_BATCH(batch, 0x80);
3563
3564     ADVANCE_BATCH(batch);
3565     intel_batchbuffer_end_atomic(batch);
3566 }
3567
3568 VASurfaceID
3569 i965_post_processing(
3570     VADriverContextP   ctx,
3571     VASurfaceID        surface,
3572     const VARectangle *src_rect,
3573     const VARectangle *dst_rect,
3574     unsigned int       flags,
3575     int               *has_done_scaling  
3576 )
3577 {
3578     struct i965_driver_data *i965 = i965_driver_data(ctx);
3579     VASurfaceID in_surface_id = surface;
3580     VASurfaceID out_surface_id = VA_INVALID_ID;
3581     
3582     *has_done_scaling = 0;
3583
3584     if (HAS_PP(i965)) {
3585         struct object_surface *obj_surface;
3586         VAStatus status;
3587         struct i965_surface src_surface;
3588         struct i965_surface dst_surface;
3589
3590         obj_surface = SURFACE(in_surface_id);
3591
3592         /* Currently only support post processing for NV12 surface */
3593         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3594             return out_surface_id;
3595
3596         _i965LockMutex(&i965->pp_mutex);
3597
3598         if (flags & I965_PP_FLAG_MCDI) {
3599             status = i965_CreateSurfaces(ctx,
3600                                          obj_surface->orig_width,
3601                                          obj_surface->orig_height,
3602                                          VA_RT_FORMAT_YUV420,
3603                                          1,
3604                                          &out_surface_id);
3605             assert(status == VA_STATUS_SUCCESS);
3606             obj_surface = SURFACE(out_surface_id);
3607             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3608             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3609             src_surface.id = in_surface_id;
3610             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3611             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3612                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3613             dst_surface.id = out_surface_id;
3614             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3615             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3616
3617             i965_post_processing_internal(ctx, i965->pp_context,
3618                                           &src_surface,
3619                                           src_rect,
3620                                           &dst_surface,
3621                                           dst_rect,
3622                                           PP_NV12_DNDI,
3623                                           NULL);
3624         }
3625
3626         if (flags & I965_PP_FLAG_AVS) {
3627             struct i965_render_state *render_state = &i965->render_state;
3628             struct intel_region *dest_region = render_state->draw_region;
3629
3630             if (out_surface_id != VA_INVALID_ID)
3631                 in_surface_id = out_surface_id;
3632
3633             status = i965_CreateSurfaces(ctx,
3634                                          dest_region->width,
3635                                          dest_region->height,
3636                                          VA_RT_FORMAT_YUV420,
3637                                          1,
3638                                          &out_surface_id);
3639             assert(status == VA_STATUS_SUCCESS);
3640             obj_surface = SURFACE(out_surface_id);
3641             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3642             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3643             src_surface.id = in_surface_id;
3644             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3645             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3646             dst_surface.id = out_surface_id;
3647             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3648             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3649
3650             i965_post_processing_internal(ctx, i965->pp_context,
3651                                           &src_surface,
3652                                           src_rect,
3653                                           &dst_surface,
3654                                           dst_rect,
3655                                           PP_NV12_AVS,
3656                                           NULL);
3657
3658             if (in_surface_id != surface)
3659                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3660                 
3661             *has_done_scaling = 1;
3662         }
3663
3664         _i965UnlockMutex(&i965->pp_mutex);
3665     }
3666
3667     return out_surface_id;
3668 }       
3669
3670 static VAStatus
3671 i965_image_pl3_processing(VADriverContextP ctx,
3672                           const struct i965_surface *src_surface,
3673                           const VARectangle *src_rect,
3674                           struct i965_surface *dst_surface,
3675                           const VARectangle *dst_rect)
3676 {
3677     struct i965_driver_data *i965 = i965_driver_data(ctx);
3678     struct i965_post_processing_context *pp_context = i965->pp_context;
3679     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3680
3681     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3682         i965_post_processing_internal(ctx, i965->pp_context,
3683                                       src_surface,
3684                                       src_rect,
3685                                       dst_surface,
3686                                       dst_rect,
3687                                       PP_PL3_LOAD_SAVE_N12,
3688                                       NULL);
3689     } else {
3690         i965_post_processing_internal(ctx, i965->pp_context,
3691                                       src_surface,
3692                                       src_rect,
3693                                       dst_surface,
3694                                       dst_rect,
3695                                       PP_PL3_LOAD_SAVE_PL3,
3696                                       NULL);
3697     }
3698
3699     intel_batchbuffer_flush(pp_context->batch);
3700
3701     return VA_STATUS_SUCCESS;
3702 }
3703
3704 static VAStatus
3705 i965_image_pl2_processing(VADriverContextP ctx,
3706                           const struct i965_surface *src_surface,
3707                           const VARectangle *src_rect,
3708                           struct i965_surface *dst_surface,
3709                           const VARectangle *dst_rect)
3710 {
3711     struct i965_driver_data *i965 = i965_driver_data(ctx);
3712     struct i965_post_processing_context *pp_context = i965->pp_context;
3713     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3714
3715     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3716         i965_post_processing_internal(ctx, i965->pp_context,
3717                                       src_surface,
3718                                       src_rect,
3719                                       dst_surface,
3720                                       dst_rect,
3721                                       PP_NV12_LOAD_SAVE_N12,
3722                                       NULL);
3723     } else {
3724         i965_post_processing_internal(ctx, i965->pp_context,
3725                                       src_surface,
3726                                       src_rect,
3727                                       dst_surface,
3728                                       dst_rect,
3729                                       PP_NV12_LOAD_SAVE_PL3,
3730                                       NULL);
3731     }
3732
3733     intel_batchbuffer_flush(pp_context->batch);
3734
3735     return VA_STATUS_SUCCESS;
3736 }
3737
3738 VAStatus
3739 i965_image_processing(VADriverContextP ctx,
3740                       const struct i965_surface *src_surface,
3741                       const VARectangle *src_rect,
3742                       struct i965_surface *dst_surface,
3743                       const VARectangle *dst_rect)
3744 {
3745     struct i965_driver_data *i965 = i965_driver_data(ctx);
3746     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
3747
3748     if (HAS_PP(i965)) {
3749         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
3750
3751         _i965LockMutex(&i965->pp_mutex);
3752
3753         switch (fourcc) {
3754         case VA_FOURCC('Y', 'V', '1', '2'):
3755         case VA_FOURCC('I', '4', '2', '0'):
3756         case VA_FOURCC('I', 'M', 'C', '1'):
3757         case VA_FOURCC('I', 'M', 'C', '3'):
3758             status = i965_image_pl3_processing(ctx,
3759                                                src_surface,
3760                                                src_rect,
3761                                                dst_surface,
3762                                                dst_rect);
3763             break;
3764
3765         case  VA_FOURCC('N', 'V', '1', '2'):
3766             status = i965_image_pl2_processing(ctx,
3767                                                src_surface,
3768                                                src_rect,
3769                                                dst_surface,
3770                                                dst_rect);
3771             break;
3772
3773         default:
3774             status = VA_STATUS_ERROR_UNIMPLEMENTED;
3775             break;
3776         }
3777         
3778         _i965UnlockMutex(&i965->pp_mutex);
3779     }
3780
3781     return status;
3782 }       
3783
3784 static void
3785 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
3786 {
3787     int i;
3788
3789     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3790     pp_context->surface_state_binding_table.bo = NULL;
3791
3792     dri_bo_unreference(pp_context->curbe.bo);
3793     pp_context->curbe.bo = NULL;
3794
3795     dri_bo_unreference(pp_context->sampler_state_table.bo);
3796     pp_context->sampler_state_table.bo = NULL;
3797
3798     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3799     pp_context->sampler_state_table.bo_8x8 = NULL;
3800
3801     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3802     pp_context->sampler_state_table.bo_8x8_uv = NULL;
3803
3804     dri_bo_unreference(pp_context->idrt.bo);
3805     pp_context->idrt.bo = NULL;
3806     pp_context->idrt.num_interface_descriptors = 0;
3807
3808     dri_bo_unreference(pp_context->vfe_state.bo);
3809     pp_context->vfe_state.bo = NULL;
3810
3811     dri_bo_unreference(pp_context->stmm.bo);
3812     pp_context->stmm.bo = NULL;
3813
3814     for (i = 0; i < NUM_PP_MODULES; i++) {
3815         struct pp_module *pp_module = &pp_context->pp_modules[i];
3816
3817         dri_bo_unreference(pp_module->kernel.bo);
3818         pp_module->kernel.bo = NULL;
3819     }
3820
3821     free(pp_context->pp_static_parameter);
3822     free(pp_context->pp_inline_parameter);
3823     pp_context->pp_static_parameter = NULL;
3824     pp_context->pp_inline_parameter = NULL;
3825 }
3826
3827 Bool
3828 i965_post_processing_terminate(VADriverContextP ctx)
3829 {
3830     struct i965_driver_data *i965 = i965_driver_data(ctx);
3831     struct i965_post_processing_context *pp_context = i965->pp_context;
3832
3833     if (pp_context) {
3834         i965_post_processing_context_finalize(pp_context);
3835         free(pp_context);
3836     }
3837
3838     i965->pp_context = NULL;
3839
3840     return True;
3841 }
3842
3843 static void
3844 i965_post_processing_context_init(VADriverContextP ctx,
3845                                   struct i965_post_processing_context *pp_context,
3846                                   struct intel_batchbuffer *batch)
3847 {
3848     struct i965_driver_data *i965 = i965_driver_data(ctx);
3849     int i;
3850
3851     pp_context->urb.size = URB_SIZE((&i965->intel));
3852     pp_context->urb.num_vfe_entries = 32;
3853     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
3854     pp_context->urb.num_cs_entries = 1;
3855     
3856     if (IS_GEN7(i965->intel.device_id))
3857         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
3858     else
3859         pp_context->urb.size_cs_entry = 2;
3860
3861     pp_context->urb.vfe_start = 0;
3862     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
3863         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
3864     assert(pp_context->urb.cs_start + 
3865            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
3866
3867     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
3868     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
3869     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
3870
3871     if (IS_GEN7(i965->intel.device_id))
3872         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
3873     else if (IS_GEN6(i965->intel.device_id))
3874         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
3875     else if (IS_IRONLAKE(i965->intel.device_id))
3876         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
3877
3878     for (i = 0; i < NUM_PP_MODULES; i++) {
3879         struct pp_module *pp_module = &pp_context->pp_modules[i];
3880         dri_bo_unreference(pp_module->kernel.bo);
3881         if (pp_module->kernel.bin && pp_module->kernel.size) {
3882             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
3883                                                 pp_module->kernel.name,
3884                                                 pp_module->kernel.size,
3885                                                 4096);
3886             assert(pp_module->kernel.bo);
3887             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
3888         } else {
3889             pp_module->kernel.bo = NULL;
3890         }
3891     }
3892
3893     /* static & inline parameters */
3894     if (IS_GEN7(i965->intel.device_id)) {
3895         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
3896         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
3897     } else {
3898         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
3899         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
3900     }
3901
3902     pp_context->batch = batch;
3903 }
3904
3905 Bool
3906 i965_post_processing_init(VADriverContextP ctx)
3907 {
3908     struct i965_driver_data *i965 = i965_driver_data(ctx);
3909     struct i965_post_processing_context *pp_context = i965->pp_context;
3910
3911     if (HAS_PP(i965)) {
3912         if (pp_context == NULL) {
3913             pp_context = calloc(1, sizeof(*pp_context));
3914             i965_post_processing_context_init(ctx, pp_context, i965->batch);
3915             i965->pp_context = pp_context;
3916         }
3917     }
3918
3919     return True;
3920 }
3921
3922 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
3923     PP_NULL,    /* VAProcFilterNone */
3924     PP_NV12_DN, /* VAProcFilterNoiseReduction */
3925     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
3926     PP_NULL,    /* VAProcFilterSharpening */
3927     PP_NULL,    /* VAProcFilterColorBalance */
3928     PP_NULL,    /* VAProcFilterColorStandard */
3929 };
3930
3931 static const int proc_frame_to_pp_frame[3] = {
3932     I965_SURFACE_FLAG_FRAME,
3933     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
3934     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
3935 };
3936
3937 static void 
3938 i965_proc_picture(VADriverContextP ctx, 
3939                   VAProfile profile, 
3940                   union codec_state *codec_state,
3941                   struct hw_context *hw_context)
3942 {
3943     struct i965_driver_data *i965 = i965_driver_data(ctx);
3944     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
3945     struct proc_state *proc_state = &codec_state->proc;
3946     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
3947     struct object_surface *obj_surface;
3948     struct i965_surface src_surface, dst_surface;
3949     VARectangle src_rect, dst_rect;
3950     VAStatus status;
3951     int i;
3952     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
3953     int num_tmp_surfaces = 0;
3954     unsigned int tiling = 0, swizzle = 0;
3955     int in_width, in_height;
3956
3957     assert(pipeline_param->surface != VA_INVALID_ID);
3958     assert(proc_state->current_render_target != VA_INVALID_ID);
3959
3960     obj_surface = SURFACE(pipeline_param->surface);
3961     in_width = obj_surface->orig_width;
3962     in_height = obj_surface->orig_height;
3963     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3964
3965     src_surface.id = pipeline_param->surface;
3966     src_surface.type = I965_SURFACE_TYPE_SURFACE;
3967     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
3968
3969     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
3970         VASurfaceID out_surface_id = VA_INVALID_ID;
3971
3972         src_surface.id = pipeline_param->surface;
3973         src_surface.type = I965_SURFACE_TYPE_SURFACE;
3974         src_surface.flags = I965_SURFACE_FLAG_FRAME;
3975         src_rect.x = 0;
3976         src_rect.y = 0;
3977         src_rect.width = in_width;
3978         src_rect.height = in_height;
3979
3980         status = i965_CreateSurfaces(ctx,
3981                                      in_width,
3982                                      in_height,
3983                                      VA_RT_FORMAT_YUV420,
3984                                      1,
3985                                      &out_surface_id);
3986         assert(status == VA_STATUS_SUCCESS);
3987         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
3988         obj_surface = SURFACE(out_surface_id);
3989         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
3990
3991         dst_surface.id = out_surface_id;
3992         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3993         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3994         dst_rect.x = 0;
3995         dst_rect.y = 0;
3996         dst_rect.width = in_width;
3997         dst_rect.height = in_height;
3998
3999         status = i965_image_processing(ctx,
4000                                        &src_surface,
4001                                        &src_rect,
4002                                        &dst_surface,
4003                                        &dst_rect);
4004         assert(status == VA_STATUS_SUCCESS);
4005
4006         src_surface.id = out_surface_id;
4007         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4008         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4009     }
4010
4011     if (pipeline_param->surface_region) {
4012         src_rect.x = pipeline_param->surface_region->x;
4013         src_rect.y = pipeline_param->surface_region->y;
4014         src_rect.width = pipeline_param->surface_region->width;
4015         src_rect.height = pipeline_param->surface_region->height;
4016     } else {
4017         src_rect.x = 0;
4018         src_rect.y = 0;
4019         src_rect.width = in_width;
4020         src_rect.height = in_height;
4021     }
4022
4023     if (pipeline_param->output_region) {
4024         dst_rect.x = pipeline_param->output_region->x;
4025         dst_rect.y = pipeline_param->output_region->y;
4026         dst_rect.width = pipeline_param->output_region->width;
4027         dst_rect.height = pipeline_param->output_region->height;
4028     } else {
4029         dst_rect.x = 0;
4030         dst_rect.y = 0;
4031         dst_rect.width = in_width;
4032         dst_rect.height = in_height;
4033     }
4034
4035     obj_surface = SURFACE(proc_state->current_render_target);
4036     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4037     
4038     for (i = 0; i < pipeline_param->num_filters; i++) {
4039         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4040         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4041         VAProcFilterType filter_type = filter_param->type;
4042         VASurfaceID out_surface_id = VA_INVALID_ID;
4043         int kernel_index = procfilter_to_pp_flag[filter_type];
4044
4045         if (kernel_index != PP_NULL &&
4046             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4047             status = i965_CreateSurfaces(ctx,
4048                                          in_width,
4049                                          in_height,
4050                                          VA_RT_FORMAT_YUV420,
4051                                          1,
4052                                          &out_surface_id);
4053             assert(status == VA_STATUS_SUCCESS);
4054             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4055             obj_surface = SURFACE(out_surface_id);
4056             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4057             dst_surface.id = out_surface_id;
4058             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4059             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4060                                                    &src_surface,
4061                                                    &src_rect,
4062                                                    &dst_surface,
4063                                                    &src_rect,
4064                                                    kernel_index,
4065                                                    filter_param);
4066
4067             if (status == VA_STATUS_SUCCESS) {
4068                 src_surface.id = dst_surface.id;
4069                 src_surface.type = dst_surface.type;
4070                 src_surface.flags = dst_surface.flags;
4071             }
4072         }
4073     }
4074
4075     dst_surface.id = proc_state->current_render_target;
4076     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4077
4078     if (src_rect.width == dst_rect.width &&
4079         src_rect.height == dst_rect.height) {
4080         i965_post_processing_internal(ctx, &proc_context->pp_context,
4081                                       &src_surface,
4082                                       &src_rect,
4083                                       &dst_surface,
4084                                       &dst_rect,
4085                                       PP_NV12_LOAD_SAVE_N12,
4086                                       NULL);
4087     } else {
4088
4089         i965_post_processing_internal(ctx, &proc_context->pp_context,
4090                                       &src_surface,
4091                                       &src_rect,
4092                                       &dst_surface,
4093                                       &dst_rect,
4094                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4095                                       PP_NV12_AVS : PP_NV12_SCALING,
4096                                       NULL);
4097     }
4098
4099     if (num_tmp_surfaces)
4100         i965_DestroySurfaces(ctx,
4101                              tmp_surfaces,
4102                              num_tmp_surfaces);
4103
4104     intel_batchbuffer_flush(hw_context->batch);
4105 }
4106
4107 static void
4108 i965_proc_context_destroy(void *hw_context)
4109 {
4110     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4111
4112     i965_post_processing_context_finalize(&proc_context->pp_context);
4113     intel_batchbuffer_free(proc_context->base.batch);
4114     free(proc_context);
4115 }
4116
4117 struct hw_context *
4118 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4119 {
4120     struct intel_driver_data *intel = intel_driver_data(ctx);
4121     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4122
4123     proc_context->base.destroy = i965_proc_context_destroy;
4124     proc_context->base.run = i965_proc_picture;
4125     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4126     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4127
4128     return (struct hw_context *)proc_context;
4129 }