add YUY2->YV12 conversion in post processing (SNB/ILK)
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
96 };
97
98 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
108 };
109
110 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
111                                    const struct i965_surface *src_surface,
112                                    const VARectangle *src_rect,
113                                    struct i965_surface *dst_surface,
114                                    const VARectangle *dst_rect,
115                                    void *filter_param);
116 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
117                                             const struct i965_surface *src_surface,
118                                             const VARectangle *src_rect,
119                                             struct i965_surface *dst_surface,
120                                             const VARectangle *dst_rect,
121                                             void *filter_param);
122 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                            const struct i965_surface *src_surface,
124                                            const VARectangle *src_rect,
125                                            struct i965_surface *dst_surface,
126                                            const VARectangle *dst_rect,
127                                            void *filter_param);
128 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                              const struct i965_surface *src_surface,
130                                              const VARectangle *src_rect,
131                                              struct i965_surface *dst_surface,
132                                              const VARectangle *dst_rect,
133                                              void *filter_param);
134 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                                 const struct i965_surface *src_surface,
136                                                 const VARectangle *src_rect,
137                                                 struct i965_surface *dst_surface,
138                                                 const VARectangle *dst_rect,
139                                                 void *filter_param);
140 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                         const struct i965_surface *src_surface,
142                                         const VARectangle *src_rect,
143                                         struct i965_surface *dst_surface,
144                                         const VARectangle *dst_rect,
145                                         void *filter_param);
146 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                       const struct i965_surface *src_surface,
148                                       const VARectangle *src_rect,
149                                       struct i965_surface *dst_surface,
150                                       const VARectangle *dst_rect,
151                                       void *filter_param);
152
153 static struct pp_module pp_modules_gen5[] = {
154     {
155         {
156             "NULL module (for testing)",
157             PP_NULL,
158             pp_null_gen5,
159             sizeof(pp_null_gen5),
160             NULL,
161         },
162
163         pp_null_initialize,
164     },
165
166     {
167         {
168             "NV12_NV12",
169             PP_NV12_LOAD_SAVE_N12,
170             pp_nv12_load_save_nv12_gen5,
171             sizeof(pp_nv12_load_save_nv12_gen5),
172             NULL,
173         },
174
175         pp_plx_load_save_plx_initialize,
176     },
177
178     {
179         {
180             "NV12_PL3",
181             PP_NV12_LOAD_SAVE_PL3,
182             pp_nv12_load_save_pl3_gen5,
183             sizeof(pp_nv12_load_save_pl3_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "PL3_NV12",
193             PP_PL3_LOAD_SAVE_N12,
194             pp_pl3_load_save_nv12_gen5,
195             sizeof(pp_pl3_load_save_nv12_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_PL3",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_pl3_gen5,
207             sizeof(pp_pl3_load_save_pl3_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize
212     },
213
214     {
215         {
216             "NV12 Scaling module",
217             PP_NV12_SCALING,
218             pp_nv12_scaling_gen5,
219             sizeof(pp_nv12_scaling_gen5),
220             NULL,
221         },
222
223         pp_nv12_scaling_initialize,
224     },
225
226     {
227         {
228             "NV12 AVS module",
229             PP_NV12_AVS,
230             pp_nv12_avs_gen5,
231             sizeof(pp_nv12_avs_gen5),
232             NULL,
233         },
234
235         pp_nv12_avs_initialize_nlas,
236     },
237
238     {
239         {
240             "NV12 DNDI module",
241             PP_NV12_DNDI,
242             pp_nv12_dndi_gen5,
243             sizeof(pp_nv12_dndi_gen5),
244             NULL,
245         },
246
247         pp_nv12_dndi_initialize,
248     },
249
250     {
251         {
252             "NV12 DN module",
253             PP_NV12_DN,
254             pp_nv12_dn_gen5,
255             sizeof(pp_nv12_dn_gen5),
256             NULL,
257         },
258
259         pp_nv12_dn_initialize,
260     },
261
262     {
263         {
264             "NV12_PA module",
265             PP_NV12_LOAD_SAVE_PA,
266             pp_nv12_load_save_pa_gen5,
267             sizeof(pp_nv12_load_save_pa_gen5),
268             NULL,
269         },
270     
271         pp_plx_load_save_plx_initialize,
272     },
273
274     {
275         {
276             "PL3_PA module",
277             PP_PL3_LOAD_SAVE_PA,
278             pp_pl3_load_save_pa_gen5,
279             sizeof(pp_pl3_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PA_NV12 module",
289             PP_PA_LOAD_SAVE_NV12,
290             pp_pa_load_save_nv12_gen5,
291             sizeof(pp_pa_load_save_nv12_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_PL3 module",
301             PP_PA_LOAD_SAVE_PL3,
302             pp_pa_load_save_pl3_gen5,
303             sizeof(pp_pa_load_save_pl3_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310 };
311
312 static const uint32_t pp_null_gen6[][4] = {
313 #include "shaders/post_processing/gen5_6/null.g6b"
314 };
315
316 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
317 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
318 };
319
320 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
321 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
322 };
323
324 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
325 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
326 };
327
328 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
329 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
330 };
331
332 static const uint32_t pp_nv12_scaling_gen6[][4] = {
333 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
334 };
335
336 static const uint32_t pp_nv12_avs_gen6[][4] = {
337 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
338 };
339
340 static const uint32_t pp_nv12_dndi_gen6[][4] = {
341 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
342 };
343
344 static const uint32_t pp_nv12_dn_gen6[][4] = {
345 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
346 };
347
348 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
350 };
351
352 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
354 };
355
356 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
358 };
359
360 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
362 };
363
364 static struct pp_module pp_modules_gen6[] = {
365     {
366         {
367             "NULL module (for testing)",
368             PP_NULL,
369             pp_null_gen6,
370             sizeof(pp_null_gen6),
371             NULL,
372         },
373
374         pp_null_initialize,
375     },
376
377     {
378         {
379             "NV12_NV12",
380             PP_NV12_LOAD_SAVE_N12,
381             pp_nv12_load_save_nv12_gen6,
382             sizeof(pp_nv12_load_save_nv12_gen6),
383             NULL,
384         },
385
386         pp_plx_load_save_plx_initialize,
387     },
388
389     {
390         {
391             "NV12_PL3",
392             PP_NV12_LOAD_SAVE_PL3,
393             pp_nv12_load_save_pl3_gen6,
394             sizeof(pp_nv12_load_save_pl3_gen6),
395             NULL,
396         },
397         
398         pp_plx_load_save_plx_initialize,
399     },
400
401     {
402         {
403             "PL3_NV12",
404             PP_PL3_LOAD_SAVE_N12,
405             pp_pl3_load_save_nv12_gen6,
406             sizeof(pp_pl3_load_save_nv12_gen6),
407             NULL,
408         },
409
410         pp_plx_load_save_plx_initialize,
411     },
412
413     {
414         {
415             "PL3_PL3",
416             PP_PL3_LOAD_SAVE_N12,
417             pp_pl3_load_save_pl3_gen6,
418             sizeof(pp_pl3_load_save_pl3_gen6),
419             NULL,
420         },
421
422         pp_plx_load_save_plx_initialize,
423     },
424
425     {
426         {
427             "NV12 Scaling module",
428             PP_NV12_SCALING,
429             pp_nv12_scaling_gen6,
430             sizeof(pp_nv12_scaling_gen6),
431             NULL,
432         },
433
434         gen6_nv12_scaling_initialize,
435     },
436
437     {
438         {
439             "NV12 AVS module",
440             PP_NV12_AVS,
441             pp_nv12_avs_gen6,
442             sizeof(pp_nv12_avs_gen6),
443             NULL,
444         },
445
446         pp_nv12_avs_initialize_nlas,
447     },
448
449     {
450         {
451             "NV12 DNDI module",
452             PP_NV12_DNDI,
453             pp_nv12_dndi_gen6,
454             sizeof(pp_nv12_dndi_gen6),
455             NULL,
456         },
457
458         pp_nv12_dndi_initialize,
459     },
460
461     {
462         {
463             "NV12 DN module",
464             PP_NV12_DN,
465             pp_nv12_dn_gen6,
466             sizeof(pp_nv12_dn_gen6),
467             NULL,
468         },
469
470         pp_nv12_dn_initialize,
471     },
472     {
473         {
474             "NV12_PA module",
475             PP_NV12_LOAD_SAVE_PA,
476             pp_nv12_load_save_pa_gen6,
477             sizeof(pp_nv12_load_save_pa_gen6),
478             NULL,
479         },
480     
481         pp_plx_load_save_plx_initialize,
482     },
483     
484     {
485         {
486             "PL3_PA module",
487             PP_PL3_LOAD_SAVE_PA,
488             pp_pl3_load_save_pa_gen6,
489             sizeof(pp_pl3_load_save_pa_gen6),
490             NULL,
491         },
492     
493         pp_plx_load_save_plx_initialize,
494     },
495     
496     {
497         {
498             "PA_NV12 module",
499             PP_PA_LOAD_SAVE_NV12,
500             pp_pa_load_save_nv12_gen6,
501             sizeof(pp_pa_load_save_nv12_gen6),
502             NULL,
503         },
504     
505         pp_plx_load_save_plx_initialize,
506     },
507
508     {
509         {
510             "PA_PL3 module",
511             PP_PA_LOAD_SAVE_PL3,
512             pp_pa_load_save_pl3_gen6,
513             sizeof(pp_pa_load_save_pl3_gen6),
514             NULL,
515         },
516     
517         pp_plx_load_save_plx_initialize,
518     },
519     
520 };
521
522 static const uint32_t pp_null_gen7[][4] = {
523 };
524
525 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
526 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
527 };
528
529 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
530 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
531 };
532
533 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
534 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
535 };
536
537 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
538 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
539 };
540
541 static const uint32_t pp_nv12_scaling_gen7[][4] = {
542 #include "shaders/post_processing/gen7/avs.g7b"
543 };
544
545 static const uint32_t pp_nv12_avs_gen7[][4] = {
546 #include "shaders/post_processing/gen7/avs.g7b"
547 };
548
549 static const uint32_t pp_nv12_dndi_gen7[][4] = {
550 // #include "shaders/post_processing/gen7/dndi.g7b"
551 };
552
553 static const uint32_t pp_nv12_dn_gen7[][4] = {
554 };
555 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
556 };
557 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
558 };
559 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
560 };
561 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
562 };
563
564 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
565                                            const struct i965_surface *src_surface,
566                                            const VARectangle *src_rect,
567                                            struct i965_surface *dst_surface,
568                                            const VARectangle *dst_rect,
569                                            void *filter_param);
570 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
571                                              const struct i965_surface *src_surface,
572                                              const VARectangle *src_rect,
573                                              struct i965_surface *dst_surface,
574                                              const VARectangle *dst_rect,
575                                              void *filter_param);
576 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
577                                            const struct i965_surface *src_surface,
578                                            const VARectangle *src_rect,
579                                            struct i965_surface *dst_surface,
580                                            const VARectangle *dst_rect,
581                                            void *filter_param);
582
583 static struct pp_module pp_modules_gen7[] = {
584     {
585         {
586             "NULL module (for testing)",
587             PP_NULL,
588             pp_null_gen7,
589             sizeof(pp_null_gen7),
590             NULL,
591         },
592
593         pp_null_initialize,
594     },
595
596     {
597         {
598             "NV12_NV12",
599             PP_NV12_LOAD_SAVE_N12,
600             pp_nv12_load_save_nv12_gen7,
601             sizeof(pp_nv12_load_save_nv12_gen7),
602             NULL,
603         },
604
605         gen7_pp_plx_avs_initialize,
606     },
607
608     {
609         {
610             "NV12_PL3",
611             PP_NV12_LOAD_SAVE_PL3,
612             pp_nv12_load_save_pl3_gen7,
613             sizeof(pp_nv12_load_save_pl3_gen7),
614             NULL,
615         },
616         
617         gen7_pp_plx_avs_initialize,
618     },
619
620     {
621         {
622             "PL3_NV12",
623             PP_PL3_LOAD_SAVE_N12,
624             pp_pl3_load_save_nv12_gen7,
625             sizeof(pp_pl3_load_save_nv12_gen7),
626             NULL,
627         },
628
629         gen7_pp_plx_avs_initialize,
630     },
631
632     {
633         {
634             "PL3_PL3",
635             PP_PL3_LOAD_SAVE_N12,
636             pp_pl3_load_save_pl3_gen7,
637             sizeof(pp_pl3_load_save_pl3_gen7),
638             NULL,
639         },
640
641         gen7_pp_plx_avs_initialize,
642     },
643
644     {
645         {
646             "NV12 Scaling module",
647             PP_NV12_SCALING,
648             pp_nv12_scaling_gen7,
649             sizeof(pp_nv12_scaling_gen7),
650             NULL,
651         },
652
653         gen7_pp_plx_avs_initialize,
654     },
655
656     {
657         {
658             "NV12 AVS module",
659             PP_NV12_AVS,
660             pp_nv12_avs_gen7,
661             sizeof(pp_nv12_avs_gen7),
662             NULL,
663         },
664
665         gen7_pp_plx_avs_initialize,
666     },
667
668     {
669         {
670             "NV12 DNDI module",
671             PP_NV12_DNDI,
672             pp_nv12_dndi_gen7,
673             sizeof(pp_nv12_dndi_gen7),
674             NULL,
675         },
676
677         gen7_pp_nv12_dndi_initialize,
678     },
679
680     {
681         {
682             "NV12 DN module",
683             PP_NV12_DN,
684             pp_nv12_dn_gen7,
685             sizeof(pp_nv12_dn_gen7),
686             NULL,
687         },
688
689         gen7_pp_nv12_dn_initialize,
690     },
691     {
692         {
693             "NV12_PA module",
694             PP_NV12_LOAD_SAVE_PA,
695             pp_nv12_load_save_pa_gen7,
696             sizeof(pp_nv12_load_save_pa_gen7),
697             NULL,
698         },
699     
700         pp_plx_load_save_plx_initialize,
701     },
702
703     {
704         {
705             "PL3_PA module",
706             PP_PL3_LOAD_SAVE_PA,
707             pp_pl3_load_save_pa_gen7,
708             sizeof(pp_pl3_load_save_pa_gen7),
709             NULL,
710         },
711     
712         pp_plx_load_save_plx_initialize,
713     },
714
715     {
716         {
717             "PA_NV12 module",
718             PP_PA_LOAD_SAVE_NV12,
719             pp_pa_load_save_nv12_gen7,
720             sizeof(pp_pa_load_save_nv12_gen7),
721             NULL,
722         },
723     
724         pp_plx_load_save_plx_initialize,
725     },
726
727     {
728         {
729             "PA_PL3 module",
730             PP_PA_LOAD_SAVE_PL3,
731             pp_pa_load_save_pl3_gen7,
732             sizeof(pp_pa_load_save_pl3_gen7),
733             NULL,
734         },
735     
736         pp_plx_load_save_plx_initialize,
737     },
738     
739 };
740
741 static int
742 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
743 {
744     struct i965_driver_data *i965 = i965_driver_data(ctx);
745     int fourcc;
746
747     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
748         struct object_image *obj_image = IMAGE(surface->id);
749         fourcc = obj_image->image.format.fourcc;
750     } else {
751         struct object_surface *obj_surface = SURFACE(surface->id);
752         fourcc = obj_surface->fourcc;
753     }
754
755     return fourcc;
756 }
757
758 static void
759 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
760 {
761     switch (tiling) {
762     case I915_TILING_NONE:
763         ss->ss3.tiled_surface = 0;
764         ss->ss3.tile_walk = 0;
765         break;
766     case I915_TILING_X:
767         ss->ss3.tiled_surface = 1;
768         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
769         break;
770     case I915_TILING_Y:
771         ss->ss3.tiled_surface = 1;
772         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
773         break;
774     }
775 }
776
777 static void
778 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
779 {
780     switch (tiling) {
781     case I915_TILING_NONE:
782         ss->ss2.tiled_surface = 0;
783         ss->ss2.tile_walk = 0;
784         break;
785     case I915_TILING_X:
786         ss->ss2.tiled_surface = 1;
787         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
788         break;
789     case I915_TILING_Y:
790         ss->ss2.tiled_surface = 1;
791         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
792         break;
793     }
794 }
795
796 static void
797 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
798 {
799     switch (tiling) {
800     case I915_TILING_NONE:
801         ss->ss0.tiled_surface = 0;
802         ss->ss0.tile_walk = 0;
803         break;
804     case I915_TILING_X:
805         ss->ss0.tiled_surface = 1;
806         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
807         break;
808     case I915_TILING_Y:
809         ss->ss0.tiled_surface = 1;
810         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
811         break;
812     }
813 }
814
815 static void
816 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
817 {
818     switch (tiling) {
819     case I915_TILING_NONE:
820         ss->ss2.tiled_surface = 0;
821         ss->ss2.tile_walk = 0;
822         break;
823     case I915_TILING_X:
824         ss->ss2.tiled_surface = 1;
825         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
826         break;
827     case I915_TILING_Y:
828         ss->ss2.tiled_surface = 1;
829         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
830         break;
831     }
832 }
833
834 static void
835 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
836 {
837     struct i965_interface_descriptor *desc;
838     dri_bo *bo;
839     int pp_index = pp_context->current_pp;
840
841     bo = pp_context->idrt.bo;
842     dri_bo_map(bo, 1);
843     assert(bo->virtual);
844     desc = bo->virtual;
845     memset(desc, 0, sizeof(*desc));
846     desc->desc0.grf_reg_blocks = 10;
847     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
848     desc->desc1.const_urb_entry_read_offset = 0;
849     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
850     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
851     desc->desc2.sampler_count = 0;
852     desc->desc3.binding_table_entry_count = 0;
853     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
854
855     dri_bo_emit_reloc(bo,
856                       I915_GEM_DOMAIN_INSTRUCTION, 0,
857                       desc->desc0.grf_reg_blocks,
858                       offsetof(struct i965_interface_descriptor, desc0),
859                       pp_context->pp_modules[pp_index].kernel.bo);
860
861     dri_bo_emit_reloc(bo,
862                       I915_GEM_DOMAIN_INSTRUCTION, 0,
863                       desc->desc2.sampler_count << 2,
864                       offsetof(struct i965_interface_descriptor, desc2),
865                       pp_context->sampler_state_table.bo);
866
867     dri_bo_unmap(bo);
868     pp_context->idrt.num_interface_descriptors++;
869 }
870
871 static void
872 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
873 {
874     struct i965_vfe_state *vfe_state;
875     dri_bo *bo;
876
877     bo = pp_context->vfe_state.bo;
878     dri_bo_map(bo, 1);
879     assert(bo->virtual);
880     vfe_state = bo->virtual;
881     memset(vfe_state, 0, sizeof(*vfe_state));
882     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
883     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
884     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
885     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
886     vfe_state->vfe1.children_present = 0;
887     vfe_state->vfe2.interface_descriptor_base = 
888         pp_context->idrt.bo->offset >> 4; /* reloc */
889     dri_bo_emit_reloc(bo,
890                       I915_GEM_DOMAIN_INSTRUCTION, 0,
891                       0,
892                       offsetof(struct i965_vfe_state, vfe2),
893                       pp_context->idrt.bo);
894     dri_bo_unmap(bo);
895 }
896
897 static void
898 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
899 {
900     unsigned char *constant_buffer;
901     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
902
903     assert(sizeof(*pp_static_parameter) == 128);
904     dri_bo_map(pp_context->curbe.bo, 1);
905     assert(pp_context->curbe.bo->virtual);
906     constant_buffer = pp_context->curbe.bo->virtual;
907     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
908     dri_bo_unmap(pp_context->curbe.bo);
909 }
910
911 static void
912 ironlake_pp_states_setup(VADriverContextP ctx,
913                          struct i965_post_processing_context *pp_context)
914 {
915     ironlake_pp_interface_descriptor_table(pp_context);
916     ironlake_pp_vfe_state(pp_context);
917     ironlake_pp_upload_constants(pp_context);
918 }
919
920 static void
921 ironlake_pp_pipeline_select(VADriverContextP ctx,
922                             struct i965_post_processing_context *pp_context)
923 {
924     struct intel_batchbuffer *batch = pp_context->batch;
925
926     BEGIN_BATCH(batch, 1);
927     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
928     ADVANCE_BATCH(batch);
929 }
930
931 static void
932 ironlake_pp_urb_layout(VADriverContextP ctx,
933                        struct i965_post_processing_context *pp_context)
934 {
935     struct intel_batchbuffer *batch = pp_context->batch;
936     unsigned int vfe_fence, cs_fence;
937
938     vfe_fence = pp_context->urb.cs_start;
939     cs_fence = pp_context->urb.size;
940
941     BEGIN_BATCH(batch, 3);
942     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
943     OUT_BATCH(batch, 0);
944     OUT_BATCH(batch, 
945               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
946               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
947     ADVANCE_BATCH(batch);
948 }
949
950 static void
951 ironlake_pp_state_base_address(VADriverContextP ctx,
952                                struct i965_post_processing_context *pp_context)
953 {
954     struct intel_batchbuffer *batch = pp_context->batch;
955
956     BEGIN_BATCH(batch, 8);
957     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
958     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
959     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
960     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
961     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
962     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
963     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
964     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
965     ADVANCE_BATCH(batch);
966 }
967
968 static void
969 ironlake_pp_state_pointers(VADriverContextP ctx,
970                            struct i965_post_processing_context *pp_context)
971 {
972     struct intel_batchbuffer *batch = pp_context->batch;
973
974     BEGIN_BATCH(batch, 3);
975     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
976     OUT_BATCH(batch, 0);
977     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
978     ADVANCE_BATCH(batch);
979 }
980
981 static void 
982 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
983                           struct i965_post_processing_context *pp_context)
984 {
985     struct intel_batchbuffer *batch = pp_context->batch;
986
987     BEGIN_BATCH(batch, 2);
988     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
989     OUT_BATCH(batch,
990               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
991               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
992     ADVANCE_BATCH(batch);
993 }
994
995 static void
996 ironlake_pp_constant_buffer(VADriverContextP ctx,
997                             struct i965_post_processing_context *pp_context)
998 {
999     struct intel_batchbuffer *batch = pp_context->batch;
1000
1001     BEGIN_BATCH(batch, 2);
1002     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1003     OUT_RELOC(batch, pp_context->curbe.bo,
1004               I915_GEM_DOMAIN_INSTRUCTION, 0,
1005               pp_context->urb.size_cs_entry - 1);
1006     ADVANCE_BATCH(batch);    
1007 }
1008
1009 static void
1010 ironlake_pp_object_walker(VADriverContextP ctx,
1011                           struct i965_post_processing_context *pp_context)
1012 {
1013     struct intel_batchbuffer *batch = pp_context->batch;
1014     int x, x_steps, y, y_steps;
1015     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1016
1017     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1018     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1019
1020     for (y = 0; y < y_steps; y++) {
1021         for (x = 0; x < x_steps; x++) {
1022             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1023                 BEGIN_BATCH(batch, 20);
1024                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1025                 OUT_BATCH(batch, 0);
1026                 OUT_BATCH(batch, 0); /* no indirect data */
1027                 OUT_BATCH(batch, 0);
1028
1029                 /* inline data grf 5-6 */
1030                 assert(sizeof(*pp_inline_parameter) == 64);
1031                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1032
1033                 ADVANCE_BATCH(batch);
1034             }
1035         }
1036     }
1037 }
1038
1039 static void
1040 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1041                            struct i965_post_processing_context *pp_context)
1042 {
1043     struct intel_batchbuffer *batch = pp_context->batch;
1044
1045     intel_batchbuffer_start_atomic(batch, 0x1000);
1046     intel_batchbuffer_emit_mi_flush(batch);
1047     ironlake_pp_pipeline_select(ctx, pp_context);
1048     ironlake_pp_state_base_address(ctx, pp_context);
1049     ironlake_pp_state_pointers(ctx, pp_context);
1050     ironlake_pp_urb_layout(ctx, pp_context);
1051     ironlake_pp_cs_urb_layout(ctx, pp_context);
1052     ironlake_pp_constant_buffer(ctx, pp_context);
1053     ironlake_pp_object_walker(ctx, pp_context);
1054     intel_batchbuffer_end_atomic(batch);
1055 }
1056
1057 static void
1058 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1059                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1060                           int width, int height, int pitch, int format, 
1061                           int index, int is_target)
1062 {
1063     struct i965_surface_state *ss;
1064     dri_bo *ss_bo;
1065     unsigned int tiling;
1066     unsigned int swizzle;
1067
1068     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1069     ss_bo = pp_context->surface_state_binding_table.bo;
1070     assert(ss_bo);
1071
1072     dri_bo_map(ss_bo, True);
1073     assert(ss_bo->virtual);
1074     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1075     memset(ss, 0, sizeof(*ss));
1076     ss->ss0.surface_type = I965_SURFACE_2D;
1077     ss->ss0.surface_format = format;
1078     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1079     ss->ss2.width = width - 1;
1080     ss->ss2.height = height - 1;
1081     ss->ss3.pitch = pitch - 1;
1082     pp_set_surface_tiling(ss, tiling);
1083     dri_bo_emit_reloc(ss_bo,
1084                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1085                       surf_bo_offset,
1086                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1087                       surf_bo);
1088     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1089     dri_bo_unmap(ss_bo);
1090 }
1091
1092 static void
1093 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1094                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1095                            int width, int height, int wpitch,
1096                            int xoffset, int yoffset,
1097                            int format, int interleave_chroma,
1098                            int index)
1099 {
1100     struct i965_surface_state2 *ss2;
1101     dri_bo *ss2_bo;
1102     unsigned int tiling;
1103     unsigned int swizzle;
1104
1105     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1106     ss2_bo = pp_context->surface_state_binding_table.bo;
1107     assert(ss2_bo);
1108
1109     dri_bo_map(ss2_bo, True);
1110     assert(ss2_bo->virtual);
1111     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1112     memset(ss2, 0, sizeof(*ss2));
1113     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1114     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1115     ss2->ss1.width = width - 1;
1116     ss2->ss1.height = height - 1;
1117     ss2->ss2.pitch = wpitch - 1;
1118     ss2->ss2.interleave_chroma = interleave_chroma;
1119     ss2->ss2.surface_format = format;
1120     ss2->ss3.x_offset_for_cb = xoffset;
1121     ss2->ss3.y_offset_for_cb = yoffset;
1122     pp_set_surface2_tiling(ss2, tiling);
1123     dri_bo_emit_reloc(ss2_bo,
1124                       I915_GEM_DOMAIN_RENDER, 0,
1125                       surf_bo_offset,
1126                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1127                       surf_bo);
1128     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1129     dri_bo_unmap(ss2_bo);
1130 }
1131
1132 static void
1133 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1134                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1135                           int width, int height, int pitch, int format, 
1136                           int index, int is_target)
1137 {
1138     struct gen7_surface_state *ss;
1139     dri_bo *ss_bo;
1140     unsigned int tiling;
1141     unsigned int swizzle;
1142
1143     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1144     ss_bo = pp_context->surface_state_binding_table.bo;
1145     assert(ss_bo);
1146
1147     dri_bo_map(ss_bo, True);
1148     assert(ss_bo->virtual);
1149     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1150     memset(ss, 0, sizeof(*ss));
1151     ss->ss0.surface_type = I965_SURFACE_2D;
1152     ss->ss0.surface_format = format;
1153     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1154     ss->ss2.width = width - 1;
1155     ss->ss2.height = height - 1;
1156     ss->ss3.pitch = pitch - 1;
1157     gen7_pp_set_surface_tiling(ss, tiling);
1158     dri_bo_emit_reloc(ss_bo,
1159                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1160                       surf_bo_offset,
1161                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1162                       surf_bo);
1163     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1164     dri_bo_unmap(ss_bo);
1165 }
1166
1167 static void
1168 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1169                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1170                            int width, int height, int wpitch,
1171                            int xoffset, int yoffset,
1172                            int format, int interleave_chroma,
1173                            int index)
1174 {
1175     struct gen7_surface_state2 *ss2;
1176     dri_bo *ss2_bo;
1177     unsigned int tiling;
1178     unsigned int swizzle;
1179
1180     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1181     ss2_bo = pp_context->surface_state_binding_table.bo;
1182     assert(ss2_bo);
1183
1184     dri_bo_map(ss2_bo, True);
1185     assert(ss2_bo->virtual);
1186     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1187     memset(ss2, 0, sizeof(*ss2));
1188     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1189     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1190     ss2->ss1.width = width - 1;
1191     ss2->ss1.height = height - 1;
1192     ss2->ss2.pitch = wpitch - 1;
1193     ss2->ss2.interleave_chroma = interleave_chroma;
1194     ss2->ss2.surface_format = format;
1195     ss2->ss3.x_offset_for_cb = xoffset;
1196     ss2->ss3.y_offset_for_cb = yoffset;
1197     gen7_pp_set_surface2_tiling(ss2, tiling);
1198     dri_bo_emit_reloc(ss2_bo,
1199                       I915_GEM_DOMAIN_RENDER, 0,
1200                       surf_bo_offset,
1201                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1202                       surf_bo);
1203     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1204     dri_bo_unmap(ss2_bo);
1205 }
1206
1207 static void 
1208 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1209                                 const struct i965_surface *surface, 
1210                                 int base_index, int is_target,
1211                                 int *width, int *height, int *pitch, int *offset)
1212 {
1213     struct i965_driver_data *i965 = i965_driver_data(ctx);
1214     struct object_surface *obj_surface;
1215     struct object_image *obj_image;
1216     dri_bo *bo;
1217     int fourcc = pp_get_surface_fourcc(ctx, surface);
1218     const int Y = 0;
1219     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1220     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1221     const int UV = 1;
1222     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1223     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1224
1225     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1226         obj_surface = SURFACE(surface->id);
1227         bo = obj_surface->bo;
1228         width[0] = obj_surface->orig_width;
1229         height[0] = obj_surface->orig_height;
1230         pitch[0] = obj_surface->width;
1231         offset[0] = 0;
1232
1233         if (packed_yuv ) {
1234             width[0] = obj_surface->orig_width * 2; 
1235             pitch[0] = obj_surface->width * 2;
1236         }
1237         else if (interleaved_uv) {
1238             width[1] = obj_surface->orig_width;
1239             height[1] = obj_surface->orig_height / 2;
1240             pitch[1] = obj_surface->width;
1241             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1242         } else {
1243             width[1] = obj_surface->orig_width / 2;
1244             height[1] = obj_surface->orig_height / 2;
1245             pitch[1] = obj_surface->width / 2;
1246             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1247             width[2] = obj_surface->orig_width / 2;
1248             height[2] = obj_surface->orig_height / 2;
1249             pitch[2] = obj_surface->width / 2;
1250             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1251         }
1252     } else {
1253         obj_image = IMAGE(surface->id);
1254         bo = obj_image->bo;
1255         width[0] = obj_image->image.width;
1256         height[0] = obj_image->image.height;
1257         pitch[0] = obj_image->image.pitches[0];
1258         offset[0] = obj_image->image.offsets[0];
1259
1260         if (interleaved_uv) {
1261             width[1] = obj_image->image.width;
1262             height[1] = obj_image->image.height / 2;
1263             pitch[1] = obj_image->image.pitches[1];
1264             offset[1] = obj_image->image.offsets[1];
1265         } else {
1266             width[1] = obj_image->image.width / 2;
1267             height[1] = obj_image->image.height / 2;
1268             pitch[1] = obj_image->image.pitches[1];
1269             offset[1] = obj_image->image.offsets[1];
1270             width[2] = obj_image->image.width / 2;
1271             height[2] = obj_image->image.height / 2;
1272             pitch[2] = obj_image->image.pitches[2];
1273             offset[2] = obj_image->image.offsets[2];
1274         }
1275     }
1276
1277     /* Y surface */
1278     i965_pp_set_surface_state(ctx, pp_context,
1279                               bo, offset[Y],
1280                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1281                               base_index, is_target);
1282
1283     if (!packed_yuv) {
1284         if (interleaved_uv) {
1285             i965_pp_set_surface_state(ctx, pp_context,
1286                                       bo, offset[UV],
1287                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1288                                       base_index + 1, is_target);
1289         } else {
1290             /* U surface */
1291             i965_pp_set_surface_state(ctx, pp_context,
1292                                       bo, offset[U],
1293                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1294                                       base_index + 1, is_target);
1295
1296             /* V surface */
1297             i965_pp_set_surface_state(ctx, pp_context,
1298                                       bo, offset[V],
1299                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1300                                       base_index + 2, is_target);
1301         }
1302     }
1303
1304 }
1305
1306 static void 
1307 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1308                                      const struct i965_surface *surface, 
1309                                      int base_index, int is_target,
1310                                      int *width, int *height, int *pitch, int *offset)
1311 {
1312     struct i965_driver_data *i965 = i965_driver_data(ctx);
1313     struct object_surface *obj_surface;
1314     struct object_image *obj_image;
1315     dri_bo *bo;
1316     int fourcc = pp_get_surface_fourcc(ctx, surface);
1317     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1318                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1319     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1320                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1321     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1322
1323     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1324         obj_surface = SURFACE(surface->id);
1325         bo = obj_surface->bo;
1326         width[0] = obj_surface->orig_width;
1327         height[0] = obj_surface->orig_height;
1328         pitch[0] = obj_surface->width;
1329         offset[0] = 0;
1330
1331         width[1] = obj_surface->cb_cr_width;
1332         height[1] = obj_surface->cb_cr_height;
1333         pitch[1] = obj_surface->cb_cr_pitch;
1334         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1335
1336         width[2] = obj_surface->cb_cr_width;
1337         height[2] = obj_surface->cb_cr_height;
1338         pitch[2] = obj_surface->cb_cr_pitch;
1339         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1340     } else {
1341         obj_image = IMAGE(surface->id);
1342         bo = obj_image->bo;
1343         width[0] = obj_image->image.width;
1344         height[0] = obj_image->image.height;
1345         pitch[0] = obj_image->image.pitches[0];
1346         offset[0] = obj_image->image.offsets[0];
1347
1348         if (interleaved_uv) {
1349             width[1] = obj_image->image.width;
1350             height[1] = obj_image->image.height / 2;
1351             pitch[1] = obj_image->image.pitches[1];
1352             offset[1] = obj_image->image.offsets[1];
1353         } else {
1354             width[1] = obj_image->image.width / 2;
1355             height[1] = obj_image->image.height / 2;
1356             pitch[1] = obj_image->image.pitches[U];
1357             offset[1] = obj_image->image.offsets[U];
1358             width[2] = obj_image->image.width / 2;
1359             height[2] = obj_image->image.height / 2;
1360             pitch[2] = obj_image->image.pitches[V];
1361             offset[2] = obj_image->image.offsets[V];
1362         }
1363     }
1364
1365     if (is_target) {
1366         gen7_pp_set_surface_state(ctx, pp_context,
1367                                   bo, 0,
1368                                   width[0] / 4, height[0], pitch[0],
1369                                   I965_SURFACEFORMAT_R8_SINT,
1370                                   base_index, 1);
1371
1372         if (interleaved_uv) {
1373             gen7_pp_set_surface_state(ctx, pp_context,
1374                                       bo, offset[1],
1375                                       width[1] / 2, height[1], pitch[1],
1376                                       I965_SURFACEFORMAT_R8G8_SINT,
1377                                       base_index + 1, 1);
1378         } else {
1379             gen7_pp_set_surface_state(ctx, pp_context,
1380                                       bo, offset[1],
1381                                       width[1] / 4, height[1], pitch[1],
1382                                       I965_SURFACEFORMAT_R8_SINT,
1383                                       base_index + 1, 1);
1384             gen7_pp_set_surface_state(ctx, pp_context,
1385                                       bo, offset[2],
1386                                       width[2] / 4, height[2], pitch[2],
1387                                       I965_SURFACEFORMAT_R8_SINT,
1388                                       base_index + 2, 1);
1389         }
1390     } else {
1391         gen7_pp_set_surface2_state(ctx, pp_context,
1392                                    bo, offset[0],
1393                                    width[0], height[0], pitch[0],
1394                                    0, 0,
1395                                    SURFACE_FORMAT_Y8_UNORM, 0,
1396                                    base_index);
1397
1398         if (interleaved_uv) {
1399             gen7_pp_set_surface2_state(ctx, pp_context,
1400                                        bo, offset[1],
1401                                        width[1], height[1], pitch[1],
1402                                        0, 0,
1403                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1404                                        base_index + 1);
1405         } else {
1406             gen7_pp_set_surface2_state(ctx, pp_context,
1407                                        bo, offset[1],
1408                                        width[1], height[1], pitch[1],
1409                                        0, 0,
1410                                        SURFACE_FORMAT_R8_UNORM, 0,
1411                                        base_index + 1);
1412             gen7_pp_set_surface2_state(ctx, pp_context,
1413                                        bo, offset[2],
1414                                        width[2], height[2], pitch[2],
1415                                        0, 0,
1416                                        SURFACE_FORMAT_R8_UNORM, 0,
1417                                        base_index + 2);
1418         }
1419     }
1420 }
1421
1422 static int
1423 pp_null_x_steps(void *private_context)
1424 {
1425     return 1;
1426 }
1427
1428 static int
1429 pp_null_y_steps(void *private_context)
1430 {
1431     return 1;
1432 }
1433
1434 static int
1435 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1436 {
1437     return 0;
1438 }
1439
1440 static VAStatus
1441 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1442                    const struct i965_surface *src_surface,
1443                    const VARectangle *src_rect,
1444                    struct i965_surface *dst_surface,
1445                    const VARectangle *dst_rect,
1446                    void *filter_param)
1447 {
1448     /* private function & data */
1449     pp_context->pp_x_steps = pp_null_x_steps;
1450     pp_context->pp_y_steps = pp_null_y_steps;
1451     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1452
1453     dst_surface->flags = src_surface->flags;
1454
1455     return VA_STATUS_SUCCESS;
1456 }
1457
1458 static int
1459 pp_load_save_x_steps(void *private_context)
1460 {
1461     return 1;
1462 }
1463
1464 static int
1465 pp_load_save_y_steps(void *private_context)
1466 {
1467     struct pp_load_save_context *pp_load_save_context = private_context;
1468
1469     return pp_load_save_context->dest_h / 8;
1470 }
1471
1472 static int
1473 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1474 {
1475     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1476
1477     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1478     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1479     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1480     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1481
1482     return 0;
1483 }
1484
1485 static VAStatus
1486 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1487                                 const struct i965_surface *src_surface,
1488                                 const VARectangle *src_rect,
1489                                 struct i965_surface *dst_surface,
1490                                 const VARectangle *dst_rect,
1491                                 void *filter_param)
1492 {
1493     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1494     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1495     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1496     int width[3], height[3], pitch[3], offset[3];
1497     const int Y = 0;
1498
1499     /* source surface */
1500     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1501                                     width, height, pitch, offset);
1502
1503     /* destination surface */
1504     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1505                                     width, height, pitch, offset);
1506
1507     /* private function & data */
1508     pp_context->pp_x_steps = pp_load_save_x_steps;
1509     pp_context->pp_y_steps = pp_load_save_y_steps;
1510     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1511     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1512     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1513
1514     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1515     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1516
1517     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1518     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1519
1520     dst_surface->flags = src_surface->flags;
1521
1522     return VA_STATUS_SUCCESS;
1523 }
1524
1525 static int
1526 pp_scaling_x_steps(void *private_context)
1527 {
1528     return 1;
1529 }
1530
1531 static int
1532 pp_scaling_y_steps(void *private_context)
1533 {
1534     struct pp_scaling_context *pp_scaling_context = private_context;
1535
1536     return pp_scaling_context->dest_h / 8;
1537 }
1538
1539 static int
1540 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1541 {
1542     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1543     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1544     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1545     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1546     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1547
1548     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1549     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1550     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1551     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1552     
1553     return 0;
1554 }
1555
1556 static VAStatus
1557 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1558                            const struct i965_surface *src_surface,
1559                            const VARectangle *src_rect,
1560                            struct i965_surface *dst_surface,
1561                            const VARectangle *dst_rect,
1562                            void *filter_param)
1563 {
1564     struct i965_driver_data *i965 = i965_driver_data(ctx);
1565     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1566     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1567     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1568     struct object_surface *obj_surface;
1569     struct i965_sampler_state *sampler_state;
1570     int in_w, in_h, in_wpitch, in_hpitch;
1571     int out_w, out_h, out_wpitch, out_hpitch;
1572
1573     /* source surface */
1574     obj_surface = SURFACE(src_surface->id);
1575     in_w = obj_surface->orig_width;
1576     in_h = obj_surface->orig_height;
1577     in_wpitch = obj_surface->width;
1578     in_hpitch = obj_surface->height;
1579
1580     /* source Y surface index 1 */
1581     i965_pp_set_surface_state(ctx, pp_context,
1582                               obj_surface->bo, 0,
1583                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1584                               1, 0);
1585
1586     /* source UV surface index 2 */
1587     i965_pp_set_surface_state(ctx, pp_context,
1588                               obj_surface->bo, in_wpitch * in_hpitch,
1589                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1590                               2, 0);
1591
1592     /* destination surface */
1593     obj_surface = SURFACE(dst_surface->id);
1594     out_w = obj_surface->orig_width;
1595     out_h = obj_surface->orig_height;
1596     out_wpitch = obj_surface->width;
1597     out_hpitch = obj_surface->height;
1598
1599     /* destination Y surface index 7 */
1600     i965_pp_set_surface_state(ctx, pp_context,
1601                               obj_surface->bo, 0,
1602                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1603                               7, 1);
1604
1605     /* destination UV surface index 8 */
1606     i965_pp_set_surface_state(ctx, pp_context,
1607                               obj_surface->bo, out_wpitch * out_hpitch,
1608                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1609                               8, 1);
1610
1611     /* sampler state */
1612     dri_bo_map(pp_context->sampler_state_table.bo, True);
1613     assert(pp_context->sampler_state_table.bo->virtual);
1614     sampler_state = pp_context->sampler_state_table.bo->virtual;
1615
1616     /* SIMD16 Y index 1 */
1617     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1618     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1619     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1620     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1621     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1622
1623     /* SIMD16 UV index 2 */
1624     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1625     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1626     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1627     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1628     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1629
1630     dri_bo_unmap(pp_context->sampler_state_table.bo);
1631
1632     /* private function & data */
1633     pp_context->pp_x_steps = pp_scaling_x_steps;
1634     pp_context->pp_y_steps = pp_scaling_y_steps;
1635     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1636
1637     pp_scaling_context->dest_x = dst_rect->x;
1638     pp_scaling_context->dest_y = dst_rect->y;
1639     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1640     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1641     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1642     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1643
1644     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1645
1646     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1647     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1648     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1649     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1650     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1651
1652     dst_surface->flags = src_surface->flags;
1653
1654     return VA_STATUS_SUCCESS;
1655 }
1656
1657 static int
1658 pp_avs_x_steps(void *private_context)
1659 {
1660     struct pp_avs_context *pp_avs_context = private_context;
1661
1662     return pp_avs_context->dest_w / 16;
1663 }
1664
1665 static int
1666 pp_avs_y_steps(void *private_context)
1667 {
1668     return 1;
1669 }
1670
1671 static int
1672 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1673 {
1674     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1675     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1676     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1677     float src_x_steping, src_y_steping, video_step_delta;
1678     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1679
1680     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1681         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1682         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1683     } else if (tmp_w >= pp_avs_context->dest_w) {
1684         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1685         pp_inline_parameter->grf6.video_step_delta = 0;
1686         
1687         if (x == 0) {
1688             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1689                 pp_avs_context->src_normalized_x;
1690         } else {
1691             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1692             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1693             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1694                 16 * 15 * video_step_delta / 2;
1695         }
1696     } else {
1697         int n0, n1, n2, nls_left, nls_right;
1698         int factor_a = 5, factor_b = 4;
1699         float f;
1700
1701         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1702         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1703         n2 = tmp_w / (16 * factor_a);
1704         nls_left = n0 + n2;
1705         nls_right = n1 + n2;
1706         f = (float) n2 * 16 / tmp_w;
1707         
1708         if (n0 < 5) {
1709             pp_inline_parameter->grf6.video_step_delta = 0.0;
1710
1711             if (x == 0) {
1712                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1713                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1714             } else {
1715                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1716                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1717                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1718                     16 * 15 * video_step_delta / 2;
1719             }
1720         } else {
1721             if (x < nls_left) {
1722                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1723                 float a = f / (nls_left * 16 * factor_b);
1724                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1725                 
1726                 pp_inline_parameter->grf6.video_step_delta = b;
1727
1728                 if (x == 0) {
1729                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1730                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1731                 } else {
1732                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1733                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1734                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1735                         16 * 15 * video_step_delta / 2;
1736                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1737                 }
1738             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1739                 /* scale the center linearly */
1740                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1741                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1742                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1743                     16 * 15 * video_step_delta / 2;
1744                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1745                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1746             } else {
1747                 float a = f / (nls_right * 16 * factor_b);
1748                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1749
1750                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1751                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1752                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1753                     16 * 15 * video_step_delta / 2;
1754                 pp_inline_parameter->grf6.video_step_delta = -b;
1755
1756                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1757                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1758                 else
1759                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1760             }
1761         }
1762     }
1763
1764     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1765     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1766     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1767     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1768
1769     return 0;
1770 }
1771
1772 static VAStatus
1773 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1774                        const struct i965_surface *src_surface,
1775                        const VARectangle *src_rect,
1776                        struct i965_surface *dst_surface,
1777                        const VARectangle *dst_rect,
1778                        void *filter_param,
1779                        int nlas)
1780 {
1781     struct i965_driver_data *i965 = i965_driver_data(ctx);
1782     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1783     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1784     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1785     struct object_surface *obj_surface;
1786     struct i965_sampler_8x8 *sampler_8x8;
1787     struct i965_sampler_8x8_state *sampler_8x8_state;
1788     int index;
1789     int in_w, in_h, in_wpitch, in_hpitch;
1790     int out_w, out_h, out_wpitch, out_hpitch;
1791     int i;
1792
1793     /* surface */
1794     obj_surface = SURFACE(src_surface->id);
1795     in_w = obj_surface->orig_width;
1796     in_h = obj_surface->orig_height;
1797     in_wpitch = obj_surface->width;
1798     in_hpitch = obj_surface->height;
1799
1800     /* source Y surface index 1 */
1801     i965_pp_set_surface2_state(ctx, pp_context,
1802                                obj_surface->bo, 0,
1803                                in_w, in_h, in_wpitch,
1804                                0, 0,
1805                                SURFACE_FORMAT_Y8_UNORM, 0,
1806                                1);
1807
1808     /* source UV surface index 2 */
1809     i965_pp_set_surface2_state(ctx, pp_context,
1810                                obj_surface->bo, in_wpitch * in_hpitch,
1811                                in_w / 2, in_h / 2, in_wpitch,
1812                                0, 0,
1813                                SURFACE_FORMAT_R8B8_UNORM, 0,
1814                                2);
1815
1816     /* destination surface */
1817     obj_surface = SURFACE(dst_surface->id);
1818     out_w = obj_surface->orig_width;
1819     out_h = obj_surface->orig_height;
1820     out_wpitch = obj_surface->width;
1821     out_hpitch = obj_surface->height;
1822     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1823
1824     /* destination Y surface index 7 */
1825     i965_pp_set_surface_state(ctx, pp_context,
1826                               obj_surface->bo, 0,
1827                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1828                               7, 1);
1829
1830     /* destination UV surface index 8 */
1831     i965_pp_set_surface_state(ctx, pp_context,
1832                               obj_surface->bo, out_wpitch * out_hpitch,
1833                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1834                               8, 1);
1835
1836     /* sampler 8x8 state */
1837     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1838     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1839     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1840     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1841     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1842
1843     for (i = 0; i < 17; i++) {
1844         /* for Y channel, currently ignore */
1845         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1846         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1847         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1848         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1849         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1850         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1851         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1852         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1853         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1854         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1855         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1856         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1857         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1858         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1859         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1860         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1861         /* for U/V channel, 0.25 */
1862         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1863         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1864         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1865         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1866         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1867         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1868         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1869         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1870         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1871         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1872         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1873         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1874         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1875         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1876         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1877         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1878     }
1879
1880     sampler_8x8_state->dw136.default_sharpness_level = 0;
1881     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1882     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1883     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1884     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1885
1886     /* sampler 8x8 */
1887     dri_bo_map(pp_context->sampler_state_table.bo, True);
1888     assert(pp_context->sampler_state_table.bo->virtual);
1889     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1890     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1891
1892     /* sample_8x8 Y index 1 */
1893     index = 1;
1894     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1895     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1896     sampler_8x8[index].dw0.ief_bypass = 1;
1897     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1898     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1899     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1900     sampler_8x8[index].dw2.global_noise_estimation = 22;
1901     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1902     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1903     sampler_8x8[index].dw3.strong_edge_weight = 7;
1904     sampler_8x8[index].dw3.regular_weight = 2;
1905     sampler_8x8[index].dw3.non_edge_weight = 0;
1906     sampler_8x8[index].dw3.gain_factor = 40;
1907     sampler_8x8[index].dw4.steepness_boost = 0;
1908     sampler_8x8[index].dw4.steepness_threshold = 0;
1909     sampler_8x8[index].dw4.mr_boost = 0;
1910     sampler_8x8[index].dw4.mr_threshold = 5;
1911     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1912     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1913     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1914     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1915     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1916     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1917     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1918     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1919     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1920     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1921     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1922     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1923     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1924     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1925     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1926     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1927     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1928     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1929     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1930     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1931     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1932     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1933     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1934     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1935     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1936     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1937     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1938     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1939     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1940     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1941     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1942     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1943     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1944     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1945     sampler_8x8[index].dw13.limiter_boost = 0;
1946     sampler_8x8[index].dw13.minimum_limiter = 10;
1947     sampler_8x8[index].dw13.maximum_limiter = 11;
1948     sampler_8x8[index].dw14.clip_limiter = 130;
1949     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1950                       I915_GEM_DOMAIN_RENDER, 
1951                       0,
1952                       0,
1953                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1954                       pp_context->sampler_state_table.bo_8x8);
1955
1956     /* sample_8x8 UV index 2 */
1957     index = 2;
1958     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1959     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1960     sampler_8x8[index].dw0.ief_bypass = 1;
1961     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1962     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1963     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1964     sampler_8x8[index].dw2.global_noise_estimation = 22;
1965     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1966     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1967     sampler_8x8[index].dw3.strong_edge_weight = 7;
1968     sampler_8x8[index].dw3.regular_weight = 2;
1969     sampler_8x8[index].dw3.non_edge_weight = 0;
1970     sampler_8x8[index].dw3.gain_factor = 40;
1971     sampler_8x8[index].dw4.steepness_boost = 0;
1972     sampler_8x8[index].dw4.steepness_threshold = 0;
1973     sampler_8x8[index].dw4.mr_boost = 0;
1974     sampler_8x8[index].dw4.mr_threshold = 5;
1975     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1976     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1977     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1978     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1979     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1980     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1981     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1982     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1983     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1984     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1985     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1986     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1987     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1988     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1989     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1990     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1991     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1992     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1993     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1994     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1995     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1996     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1997     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1998     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1999     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2000     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2001     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2002     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2003     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2004     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2005     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2006     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2007     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2008     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2009     sampler_8x8[index].dw13.limiter_boost = 0;
2010     sampler_8x8[index].dw13.minimum_limiter = 10;
2011     sampler_8x8[index].dw13.maximum_limiter = 11;
2012     sampler_8x8[index].dw14.clip_limiter = 130;
2013     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2014                       I915_GEM_DOMAIN_RENDER, 
2015                       0,
2016                       0,
2017                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2018                       pp_context->sampler_state_table.bo_8x8);
2019
2020     dri_bo_unmap(pp_context->sampler_state_table.bo);
2021
2022     /* private function & data */
2023     pp_context->pp_x_steps = pp_avs_x_steps;
2024     pp_context->pp_y_steps = pp_avs_y_steps;
2025     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2026
2027     pp_avs_context->dest_x = dst_rect->x;
2028     pp_avs_context->dest_y = dst_rect->y;
2029     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2030     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2031     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
2032     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2033     pp_avs_context->src_w = src_rect->width;
2034     pp_avs_context->src_h = src_rect->height;
2035
2036     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2037     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2038
2039     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
2040     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2041     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2042     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2043     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2044     pp_inline_parameter->grf6.video_step_delta = 0.0;
2045
2046     dst_surface->flags = src_surface->flags;
2047
2048     return VA_STATUS_SUCCESS;
2049 }
2050
2051 static VAStatus
2052 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2053                             const struct i965_surface *src_surface,
2054                             const VARectangle *src_rect,
2055                             struct i965_surface *dst_surface,
2056                             const VARectangle *dst_rect,
2057                             void *filter_param)
2058 {
2059     return pp_nv12_avs_initialize(ctx, pp_context,
2060                                   src_surface,
2061                                   src_rect,
2062                                   dst_surface,
2063                                   dst_rect,
2064                                   filter_param,
2065                                   1);
2066 }
2067
2068 static VAStatus
2069 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2070                              const struct i965_surface *src_surface,
2071                              const VARectangle *src_rect,
2072                              struct i965_surface *dst_surface,
2073                              const VARectangle *dst_rect,
2074                              void *filter_param)
2075 {
2076     return pp_nv12_avs_initialize(ctx, pp_context,
2077                                   src_surface,
2078                                   src_rect,
2079                                   dst_surface,
2080                                   dst_rect,
2081                                   filter_param,
2082                                   0);    
2083 }
2084
2085 static int
2086 gen7_pp_avs_x_steps(void *private_context)
2087 {
2088     struct pp_avs_context *pp_avs_context = private_context;
2089
2090     return pp_avs_context->dest_w / 16;
2091 }
2092
2093 static int
2094 gen7_pp_avs_y_steps(void *private_context)
2095 {
2096     struct pp_avs_context *pp_avs_context = private_context;
2097
2098     return pp_avs_context->dest_h / 16;
2099 }
2100
2101 static int
2102 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2103 {
2104     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2105     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2106
2107     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2108     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2109     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2110     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2111
2112     return 0;
2113 }
2114
2115 static VAStatus
2116 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2117                            const struct i965_surface *src_surface,
2118                            const VARectangle *src_rect,
2119                            struct i965_surface *dst_surface,
2120                            const VARectangle *dst_rect,
2121                            void *filter_param)
2122 {
2123     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2124     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2125     struct gen7_sampler_8x8 *sampler_8x8;
2126     struct i965_sampler_8x8_state *sampler_8x8_state;
2127     int index, i;
2128     int width[3], height[3], pitch[3], offset[3];
2129
2130     /* source surface */
2131     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2132                                          width, height, pitch, offset);
2133
2134     /* destination surface */
2135     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2136                                          width, height, pitch, offset);
2137
2138     /* sampler 8x8 state */
2139     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2140     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2141     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2142     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2143     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2144
2145     for (i = 0; i < 17; i++) {
2146         /* for Y channel, currently ignore */
2147         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2148         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2149         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2150         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2151         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2152         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2153         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2154         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2155         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2156         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2157         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2158         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2159         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2160         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2161         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2162         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2163         /* for U/V channel, 0.25 */
2164         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2165         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2166         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2167         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2168         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2169         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2170         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2171         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2172         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2173         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2174         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2175         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2176         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2177         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2178         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2179         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2180     }
2181
2182     sampler_8x8_state->dw136.default_sharpness_level = 0;
2183     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2184     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2185     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2186     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2187
2188     /* sampler 8x8 */
2189     dri_bo_map(pp_context->sampler_state_table.bo, True);
2190     assert(pp_context->sampler_state_table.bo->virtual);
2191     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2192     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2193
2194     /* sample_8x8 Y index 4 */
2195     index = 4;
2196     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2197     sampler_8x8[index].dw0.global_noise_estimation = 255;
2198     sampler_8x8[index].dw0.ief_bypass = 1;
2199
2200     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2201
2202     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2203     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2204     sampler_8x8[index].dw2.r5x_coefficient = 9;
2205     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2206     sampler_8x8[index].dw2.r5c_coefficient = 3;
2207
2208     sampler_8x8[index].dw3.r3x_coefficient = 27;
2209     sampler_8x8[index].dw3.r3c_coefficient = 5;
2210     sampler_8x8[index].dw3.gain_factor = 40;
2211     sampler_8x8[index].dw3.non_edge_weight = 1;
2212     sampler_8x8[index].dw3.regular_weight = 2;
2213     sampler_8x8[index].dw3.strong_edge_weight = 7;
2214     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2215
2216     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2217                       I915_GEM_DOMAIN_RENDER, 
2218                       0,
2219                       0,
2220                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2221                       pp_context->sampler_state_table.bo_8x8);
2222
2223     /* sample_8x8 UV index 8 */
2224     index = 8;
2225     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2226     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2227     sampler_8x8[index].dw0.global_noise_estimation = 255;
2228     sampler_8x8[index].dw0.ief_bypass = 1;
2229     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2230     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2231     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2232     sampler_8x8[index].dw2.r5x_coefficient = 9;
2233     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2234     sampler_8x8[index].dw2.r5c_coefficient = 3;
2235     sampler_8x8[index].dw3.r3x_coefficient = 27;
2236     sampler_8x8[index].dw3.r3c_coefficient = 5;
2237     sampler_8x8[index].dw3.gain_factor = 40;
2238     sampler_8x8[index].dw3.non_edge_weight = 1;
2239     sampler_8x8[index].dw3.regular_weight = 2;
2240     sampler_8x8[index].dw3.strong_edge_weight = 7;
2241     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2242
2243     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2244                       I915_GEM_DOMAIN_RENDER, 
2245                       0,
2246                       0,
2247                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2248                       pp_context->sampler_state_table.bo_8x8);
2249
2250     /* sampler_8x8 V, index 12 */
2251     index = 12;
2252     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2253     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2254     sampler_8x8[index].dw0.global_noise_estimation = 255;
2255     sampler_8x8[index].dw0.ief_bypass = 1;
2256     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2257     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2258     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2259     sampler_8x8[index].dw2.r5x_coefficient = 9;
2260     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2261     sampler_8x8[index].dw2.r5c_coefficient = 3;
2262     sampler_8x8[index].dw3.r3x_coefficient = 27;
2263     sampler_8x8[index].dw3.r3c_coefficient = 5;
2264     sampler_8x8[index].dw3.gain_factor = 40;
2265     sampler_8x8[index].dw3.non_edge_weight = 1;
2266     sampler_8x8[index].dw3.regular_weight = 2;
2267     sampler_8x8[index].dw3.strong_edge_weight = 7;
2268     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2269
2270     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2271                       I915_GEM_DOMAIN_RENDER, 
2272                       0,
2273                       0,
2274                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2275                       pp_context->sampler_state_table.bo_8x8);
2276
2277     dri_bo_unmap(pp_context->sampler_state_table.bo);
2278
2279     /* private function & data */
2280     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2281     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2282     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2283
2284     pp_avs_context->dest_x = dst_rect->x;
2285     pp_avs_context->dest_y = dst_rect->y;
2286     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2287     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2288     pp_avs_context->src_w = src_rect->width;
2289     pp_avs_context->src_h = src_rect->height;
2290
2291     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2292     dw = MAX(dw, pp_avs_context->dest_w);
2293
2294     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2295     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2296     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2297     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2298     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2299
2300     dst_surface->flags = src_surface->flags;
2301
2302     return VA_STATUS_SUCCESS;
2303 }
2304
2305 static int
2306 pp_dndi_x_steps(void *private_context)
2307 {
2308     return 1;
2309 }
2310
2311 static int
2312 pp_dndi_y_steps(void *private_context)
2313 {
2314     struct pp_dndi_context *pp_dndi_context = private_context;
2315
2316     return pp_dndi_context->dest_h / 4;
2317 }
2318
2319 static int
2320 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2321 {
2322     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2323
2324     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2325     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2326
2327     return 0;
2328 }
2329
2330 static VAStatus
2331 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2332                         const struct i965_surface *src_surface,
2333                         const VARectangle *src_rect,
2334                         struct i965_surface *dst_surface,
2335                         const VARectangle *dst_rect,
2336                         void *filter_param)
2337 {
2338     struct i965_driver_data *i965 = i965_driver_data(ctx);
2339     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2340     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2341     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2342     struct object_surface *obj_surface;
2343     struct i965_sampler_dndi *sampler_dndi;
2344     int index;
2345     int w, h;
2346     int orig_w, orig_h;
2347     int dndi_top_first = 1;
2348
2349     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2350         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2351
2352     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2353         dndi_top_first = 1;
2354     else
2355         dndi_top_first = 0;
2356
2357     /* surface */
2358     obj_surface = SURFACE(src_surface->id);
2359     orig_w = obj_surface->orig_width;
2360     orig_h = obj_surface->orig_height;
2361     w = obj_surface->width;
2362     h = obj_surface->height;
2363
2364     if (pp_context->stmm.bo == NULL) {
2365         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2366                                            "STMM surface",
2367                                            w * h,
2368                                            4096);
2369         assert(pp_context->stmm.bo);
2370     }
2371
2372     /* source UV surface index 2 */
2373     i965_pp_set_surface_state(ctx, pp_context,
2374                               obj_surface->bo, w * h,
2375                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2376                               2, 0);
2377
2378     /* source YUV surface index 4 */
2379     i965_pp_set_surface2_state(ctx, pp_context,
2380                                obj_surface->bo, 0,
2381                                orig_w, orig_h, w,
2382                                0, h,
2383                                SURFACE_FORMAT_PLANAR_420_8, 1,
2384                                4);
2385
2386     /* source STMM surface index 20 */
2387     i965_pp_set_surface_state(ctx, pp_context,
2388                               pp_context->stmm.bo, 0,
2389                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2390                               20, 1);
2391
2392     /* destination surface */
2393     obj_surface = SURFACE(dst_surface->id);
2394     orig_w = obj_surface->orig_width;
2395     orig_h = obj_surface->orig_height;
2396     w = obj_surface->width;
2397     h = obj_surface->height;
2398
2399     /* destination Y surface index 7 */
2400     i965_pp_set_surface_state(ctx, pp_context,
2401                               obj_surface->bo, 0,
2402                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2403                               7, 1);
2404
2405     /* destination UV surface index 8 */
2406     i965_pp_set_surface_state(ctx, pp_context,
2407                               obj_surface->bo, w * h,
2408                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2409                               8, 1);
2410     /* sampler dndi */
2411     dri_bo_map(pp_context->sampler_state_table.bo, True);
2412     assert(pp_context->sampler_state_table.bo->virtual);
2413     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2414     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2415
2416     /* sample dndi index 1 */
2417     index = 0;
2418     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2419     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2420     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2421     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2422
2423     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2424     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2425     sampler_dndi[index].dw1.stmm_c2 = 1;
2426     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2427     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2428
2429     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2430     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2431     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2432     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2433
2434     sampler_dndi[index].dw3.maximum_stmm = 128;
2435     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2436     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2437     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2438     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2439
2440     sampler_dndi[index].dw4.sdi_delta = 8;
2441     sampler_dndi[index].dw4.sdi_threshold = 128;
2442     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2443     sampler_dndi[index].dw4.stmm_shift_up = 0;
2444     sampler_dndi[index].dw4.stmm_shift_down = 0;
2445     sampler_dndi[index].dw4.minimum_stmm = 0;
2446
2447     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2448     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2449     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2450     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2451
2452     sampler_dndi[index].dw6.dn_enable = 1;
2453     sampler_dndi[index].dw6.di_enable = 1;
2454     sampler_dndi[index].dw6.di_partial = 0;
2455     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2456     sampler_dndi[index].dw6.dndi_stream_id = 0;
2457     sampler_dndi[index].dw6.dndi_first_frame = 1;
2458     sampler_dndi[index].dw6.progressive_dn = 0;
2459     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2460     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2461     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2462
2463     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2464     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2465     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2466     sampler_dndi[index].dw7.column_width_minus1 = 0;
2467
2468     dri_bo_unmap(pp_context->sampler_state_table.bo);
2469
2470     /* private function & data */
2471     pp_context->pp_x_steps = pp_dndi_x_steps;
2472     pp_context->pp_y_steps = pp_dndi_y_steps;
2473     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2474
2475     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2476     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2477     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2478     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2479
2480     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2481     pp_inline_parameter->grf5.number_blocks = w / 16;
2482     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2483     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2484
2485     pp_dndi_context->dest_w = w;
2486     pp_dndi_context->dest_h = h;
2487
2488     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2489
2490     return VA_STATUS_SUCCESS;
2491 }
2492
2493 static int
2494 pp_dn_x_steps(void *private_context)
2495 {
2496     return 1;
2497 }
2498
2499 static int
2500 pp_dn_y_steps(void *private_context)
2501 {
2502     struct pp_dn_context *pp_dn_context = private_context;
2503
2504     return pp_dn_context->dest_h / 8;
2505 }
2506
2507 static int
2508 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2509 {
2510     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2511
2512     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2513     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2514
2515     return 0;
2516 }
2517
2518 static VAStatus
2519 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2520                       const struct i965_surface *src_surface,
2521                       const VARectangle *src_rect,
2522                       struct i965_surface *dst_surface,
2523                       const VARectangle *dst_rect,
2524                       void *filter_param)
2525 {
2526     struct i965_driver_data *i965 = i965_driver_data(ctx);
2527     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2528     struct object_surface *obj_surface;
2529     struct i965_sampler_dndi *sampler_dndi;
2530     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2531     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2532     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2533     int index;
2534     int w, h;
2535     int orig_w, orig_h;
2536     int dn_strength = 15;
2537     int dndi_top_first = 1;
2538     int dn_progressive = 0;
2539
2540     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2541         dndi_top_first = 1;
2542         dn_progressive = 1;
2543     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2544         dndi_top_first = 1;
2545         dn_progressive = 0;
2546     } else {
2547         dndi_top_first = 0;
2548         dn_progressive = 0;
2549     }
2550
2551     if (dn_filter_param) {
2552         float value = dn_filter_param->value;
2553         
2554         if (value > 1.0)
2555             value = 1.0;
2556         
2557         if (value < 0.0)
2558             value = 0.0;
2559
2560         dn_strength = (int)(value * 31.0F);
2561     }
2562
2563     /* surface */
2564     obj_surface = SURFACE(src_surface->id);
2565     orig_w = obj_surface->orig_width;
2566     orig_h = obj_surface->orig_height;
2567     w = obj_surface->width;
2568     h = obj_surface->height;
2569
2570     if (pp_context->stmm.bo == NULL) {
2571         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2572                                            "STMM surface",
2573                                            w * h,
2574                                            4096);
2575         assert(pp_context->stmm.bo);
2576     }
2577
2578     /* source UV surface index 2 */
2579     i965_pp_set_surface_state(ctx, pp_context,
2580                               obj_surface->bo, w * h,
2581                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2582                               2, 0);
2583
2584     /* source YUV surface index 4 */
2585     i965_pp_set_surface2_state(ctx, pp_context,
2586                                obj_surface->bo, 0,
2587                                orig_w, orig_h, w,
2588                                0, h,
2589                                SURFACE_FORMAT_PLANAR_420_8, 1,
2590                                4);
2591
2592     /* source STMM surface index 20 */
2593     i965_pp_set_surface_state(ctx, pp_context,
2594                               pp_context->stmm.bo, 0,
2595                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2596                               20, 1);
2597
2598     /* destination surface */
2599     obj_surface = SURFACE(dst_surface->id);
2600     orig_w = obj_surface->orig_width;
2601     orig_h = obj_surface->orig_height;
2602     w = obj_surface->width;
2603     h = obj_surface->height;
2604
2605     /* destination Y surface index 7 */
2606     i965_pp_set_surface_state(ctx, pp_context,
2607                               obj_surface->bo, 0,
2608                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2609                               7, 1);
2610
2611     /* destination UV surface index 8 */
2612     i965_pp_set_surface_state(ctx, pp_context,
2613                               obj_surface->bo, w * h,
2614                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2615                               8, 1);
2616     /* sampler dn */
2617     dri_bo_map(pp_context->sampler_state_table.bo, True);
2618     assert(pp_context->sampler_state_table.bo->virtual);
2619     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2620     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2621
2622     /* sample dndi index 1 */
2623     index = 0;
2624     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2625     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2626     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2627     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2628
2629     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2630     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2631     sampler_dndi[index].dw1.stmm_c2 = 0;
2632     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2633     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2634
2635     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2636     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2637     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2638     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2639
2640     sampler_dndi[index].dw3.maximum_stmm = 128;
2641     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2642     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2643     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2644     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2645
2646     sampler_dndi[index].dw4.sdi_delta = 8;
2647     sampler_dndi[index].dw4.sdi_threshold = 128;
2648     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2649     sampler_dndi[index].dw4.stmm_shift_up = 0;
2650     sampler_dndi[index].dw4.stmm_shift_down = 0;
2651     sampler_dndi[index].dw4.minimum_stmm = 0;
2652
2653     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2654     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2655     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2656     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2657
2658     sampler_dndi[index].dw6.dn_enable = 1;
2659     sampler_dndi[index].dw6.di_enable = 0;
2660     sampler_dndi[index].dw6.di_partial = 0;
2661     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2662     sampler_dndi[index].dw6.dndi_stream_id = 1;
2663     sampler_dndi[index].dw6.dndi_first_frame = 1;
2664     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2665     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2666     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2667     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2668
2669     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2670     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2671     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2672     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2673
2674     dri_bo_unmap(pp_context->sampler_state_table.bo);
2675
2676     /* private function & data */
2677     pp_context->pp_x_steps = pp_dn_x_steps;
2678     pp_context->pp_y_steps = pp_dn_y_steps;
2679     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2680
2681     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2682     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2683     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2684     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2685
2686     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2687     pp_inline_parameter->grf5.number_blocks = w / 16;
2688     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2689     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2690
2691     pp_dn_context->dest_w = w;
2692     pp_dn_context->dest_h = h;
2693
2694     dst_surface->flags = src_surface->flags;
2695     
2696     return VA_STATUS_SUCCESS;
2697 }
2698
2699 static int
2700 gen7_pp_dndi_x_steps(void *private_context)
2701 {
2702     struct pp_dndi_context *pp_dndi_context = private_context;
2703
2704     return pp_dndi_context->dest_w / 16;
2705 }
2706
2707 static int
2708 gen7_pp_dndi_y_steps(void *private_context)
2709 {
2710     struct pp_dndi_context *pp_dndi_context = private_context;
2711
2712     return pp_dndi_context->dest_h / 4;
2713 }
2714
2715 static int
2716 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2717 {
2718     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2719
2720     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2721     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2722
2723     return 0;
2724 }
2725
2726 static VAStatus
2727 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2728                              const struct i965_surface *src_surface,
2729                              const VARectangle *src_rect,
2730                              struct i965_surface *dst_surface,
2731                              const VARectangle *dst_rect,
2732                              void *filter_param)
2733 {
2734     struct i965_driver_data *i965 = i965_driver_data(ctx);
2735     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2736     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2737     struct object_surface *obj_surface;
2738     struct gen7_sampler_dndi *sampler_dndi;
2739     int index;
2740     int w, h;
2741     int orig_w, orig_h;
2742     int dndi_top_first = 1;
2743
2744     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2745         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2746
2747     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2748         dndi_top_first = 1;
2749     else
2750         dndi_top_first = 0;
2751
2752     /* surface */
2753     obj_surface = SURFACE(src_surface->id);
2754     orig_w = obj_surface->orig_width;
2755     orig_h = obj_surface->orig_height;
2756     w = obj_surface->width;
2757     h = obj_surface->height;
2758
2759     if (pp_context->stmm.bo == NULL) {
2760         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2761                                            "STMM surface",
2762                                            w * h,
2763                                            4096);
2764         assert(pp_context->stmm.bo);
2765     }
2766
2767     /* source UV surface index 1 */
2768     gen7_pp_set_surface_state(ctx, pp_context,
2769                               obj_surface->bo, w * h,
2770                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2771                               1, 0);
2772
2773     /* source YUV surface index 3 */
2774     gen7_pp_set_surface2_state(ctx, pp_context,
2775                                obj_surface->bo, 0,
2776                                orig_w, orig_h, w,
2777                                0, h,
2778                                SURFACE_FORMAT_PLANAR_420_8, 1,
2779                                3);
2780
2781     /* source (temporal reference) YUV surface index 4 */
2782     gen7_pp_set_surface2_state(ctx, pp_context,
2783                                obj_surface->bo, 0,
2784                                orig_w, orig_h, w,
2785                                0, h,
2786                                SURFACE_FORMAT_PLANAR_420_8, 1,
2787                                4);
2788
2789     /* STMM / History Statistics input surface, index 5 */
2790     gen7_pp_set_surface_state(ctx, pp_context,
2791                               pp_context->stmm.bo, 0,
2792                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2793                               5, 1);
2794
2795     /* destination surface */
2796     obj_surface = SURFACE(dst_surface->id);
2797     orig_w = obj_surface->orig_width;
2798     orig_h = obj_surface->orig_height;
2799     w = obj_surface->width;
2800     h = obj_surface->height;
2801
2802     /* destination(Previous frame) Y surface index 27 */
2803     gen7_pp_set_surface_state(ctx, pp_context,
2804                               obj_surface->bo, 0,
2805                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2806                               27, 1);
2807
2808     /* destination(Previous frame) UV surface index 28 */
2809     gen7_pp_set_surface_state(ctx, pp_context,
2810                               obj_surface->bo, w * h,
2811                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2812                               28, 1);
2813
2814     /* destination(Current frame) Y surface index 30 */
2815     gen7_pp_set_surface_state(ctx, pp_context,
2816                               obj_surface->bo, 0,
2817                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2818                               30, 1);
2819
2820     /* destination(Current frame) UV surface index 31 */
2821     gen7_pp_set_surface_state(ctx, pp_context,
2822                               obj_surface->bo, w * h,
2823                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2824                               31, 1);
2825
2826     /* STMM output surface, index 33 */
2827     gen7_pp_set_surface_state(ctx, pp_context,
2828                               pp_context->stmm.bo, 0,
2829                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2830                               33, 1);
2831
2832
2833     /* sampler dndi */
2834     dri_bo_map(pp_context->sampler_state_table.bo, True);
2835     assert(pp_context->sampler_state_table.bo->virtual);
2836     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2837     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2838
2839     /* sample dndi index 0 */
2840     index = 0;
2841     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2842     sampler_dndi[index].dw0.dnmh_delt = 8;
2843     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2844     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2845     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2846     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2847
2848     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2849     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2850     sampler_dndi[index].dw1.stmm_c2 = 0;
2851     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2852     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2853
2854     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2855     sampler_dndi[index].dw2.bne_edge_th = 1;
2856     sampler_dndi[index].dw2.smooth_mv_th = 0;
2857     sampler_dndi[index].dw2.sad_tight_th = 5;
2858     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2859     sampler_dndi[index].dw2.good_neighbor_th = 4;
2860
2861     sampler_dndi[index].dw3.maximum_stmm = 128;
2862     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2863     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2864     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2865     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2866
2867     sampler_dndi[index].dw4.sdi_delta = 8;
2868     sampler_dndi[index].dw4.sdi_threshold = 128;
2869     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2870     sampler_dndi[index].dw4.stmm_shift_up = 0;
2871     sampler_dndi[index].dw4.stmm_shift_down = 0;
2872     sampler_dndi[index].dw4.minimum_stmm = 0;
2873
2874     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2875     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2876     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2877     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2878
2879     sampler_dndi[index].dw6.dn_enable = 0;
2880     sampler_dndi[index].dw6.di_enable = 1;
2881     sampler_dndi[index].dw6.di_partial = 0;
2882     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2883     sampler_dndi[index].dw6.dndi_stream_id = 1;
2884     sampler_dndi[index].dw6.dndi_first_frame = 1;
2885     sampler_dndi[index].dw6.progressive_dn = 0;
2886     sampler_dndi[index].dw6.mcdi_enable = 0;
2887     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2888     sampler_dndi[index].dw6.cat_th1 = 0;
2889     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2890     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2891
2892     sampler_dndi[index].dw7.sad_tha = 5;
2893     sampler_dndi[index].dw7.sad_thb = 10;
2894     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2895     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2896     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2897     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2898     sampler_dndi[index].dw7.neighborpixel_th = 10;
2899     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2900
2901     dri_bo_unmap(pp_context->sampler_state_table.bo);
2902
2903     /* private function & data */
2904     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
2905     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
2906     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
2907
2908     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2909     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2910     pp_static_parameter->grf1.di_top_field_first = 0;
2911     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2912
2913     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2914     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2915     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2916
2917     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2918     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2919
2920     pp_dndi_context->dest_w = w;
2921     pp_dndi_context->dest_h = h;
2922
2923     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2924
2925     return VA_STATUS_SUCCESS;
2926 }
2927
2928 static int
2929 gen7_pp_dn_x_steps(void *private_context)
2930 {
2931     return 1;
2932 }
2933
2934 static int
2935 gen7_pp_dn_y_steps(void *private_context)
2936 {
2937     struct pp_dn_context *pp_dn_context = private_context;
2938
2939     return pp_dn_context->dest_h / 4;
2940 }
2941
2942 static int
2943 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2944 {
2945     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2946
2947     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2948     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2949
2950     return 0;
2951 }
2952
2953 static VAStatus
2954 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2955                            const struct i965_surface *src_surface,
2956                            const VARectangle *src_rect,
2957                            struct i965_surface *dst_surface,
2958                            const VARectangle *dst_rect,
2959                            void *filter_param)
2960 {
2961     struct i965_driver_data *i965 = i965_driver_data(ctx);
2962     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2963     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2964     struct object_surface *obj_surface;
2965     struct gen7_sampler_dndi *sampler_dn;
2966     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2967     int index;
2968     int w, h;
2969     int orig_w, orig_h;
2970     int dn_strength = 15;
2971     int dndi_top_first = 1;
2972     int dn_progressive = 0;
2973
2974     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2975         dndi_top_first = 1;
2976         dn_progressive = 1;
2977     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2978         dndi_top_first = 1;
2979         dn_progressive = 0;
2980     } else {
2981         dndi_top_first = 0;
2982         dn_progressive = 0;
2983     }
2984
2985     if (dn_filter_param) {
2986         float value = dn_filter_param->value;
2987         
2988         if (value > 1.0)
2989             value = 1.0;
2990         
2991         if (value < 0.0)
2992             value = 0.0;
2993
2994         dn_strength = (int)(value * 31.0F);
2995     }
2996
2997     /* surface */
2998     obj_surface = SURFACE(src_surface->id);
2999     orig_w = obj_surface->orig_width;
3000     orig_h = obj_surface->orig_height;
3001     w = obj_surface->width;
3002     h = obj_surface->height;
3003
3004     if (pp_context->stmm.bo == NULL) {
3005         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3006                                            "STMM surface",
3007                                            w * h,
3008                                            4096);
3009         assert(pp_context->stmm.bo);
3010     }
3011
3012     /* source UV surface index 1 */
3013     gen7_pp_set_surface_state(ctx, pp_context,
3014                               obj_surface->bo, w * h,
3015                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3016                               1, 0);
3017
3018     /* source YUV surface index 3 */
3019     gen7_pp_set_surface2_state(ctx, pp_context,
3020                                obj_surface->bo, 0,
3021                                orig_w, orig_h, w,
3022                                0, h,
3023                                SURFACE_FORMAT_PLANAR_420_8, 1,
3024                                3);
3025
3026     /* source STMM surface index 5 */
3027     gen7_pp_set_surface_state(ctx, pp_context,
3028                               pp_context->stmm.bo, 0,
3029                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3030                               5, 1);
3031
3032     /* destination surface */
3033     obj_surface = SURFACE(dst_surface->id);
3034     orig_w = obj_surface->orig_width;
3035     orig_h = obj_surface->orig_height;
3036     w = obj_surface->width;
3037     h = obj_surface->height;
3038
3039     /* destination Y surface index 7 */
3040     gen7_pp_set_surface_state(ctx, pp_context,
3041                               obj_surface->bo, 0,
3042                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3043                               7, 1);
3044
3045     /* destination UV surface index 8 */
3046     gen7_pp_set_surface_state(ctx, pp_context,
3047                               obj_surface->bo, w * h,
3048                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3049                               8, 1);
3050     /* sampler dn */
3051     dri_bo_map(pp_context->sampler_state_table.bo, True);
3052     assert(pp_context->sampler_state_table.bo->virtual);
3053     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3054     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3055
3056     /* sample dn index 1 */
3057     index = 0;
3058     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3059     sampler_dn[index].dw0.dnmh_delt = 8;
3060     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3061     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3062     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3063     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3064
3065     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3066     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3067     sampler_dn[index].dw1.stmm_c2 = 0;
3068     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3069     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3070
3071     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3072     sampler_dn[index].dw2.bne_edge_th = 1;
3073     sampler_dn[index].dw2.smooth_mv_th = 0;
3074     sampler_dn[index].dw2.sad_tight_th = 5;
3075     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3076     sampler_dn[index].dw2.good_neighbor_th = 4;
3077
3078     sampler_dn[index].dw3.maximum_stmm = 128;
3079     sampler_dn[index].dw3.multipler_for_vecm = 2;
3080     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3081     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3082     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3083
3084     sampler_dn[index].dw4.sdi_delta = 8;
3085     sampler_dn[index].dw4.sdi_threshold = 128;
3086     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3087     sampler_dn[index].dw4.stmm_shift_up = 0;
3088     sampler_dn[index].dw4.stmm_shift_down = 0;
3089     sampler_dn[index].dw4.minimum_stmm = 0;
3090
3091     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3092     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3093     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3094     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3095
3096     sampler_dn[index].dw6.dn_enable = 1;
3097     sampler_dn[index].dw6.di_enable = 0;
3098     sampler_dn[index].dw6.di_partial = 0;
3099     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3100     sampler_dn[index].dw6.dndi_stream_id = 1;
3101     sampler_dn[index].dw6.dndi_first_frame = 1;
3102     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3103     sampler_dn[index].dw6.mcdi_enable = 0;
3104     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3105     sampler_dn[index].dw6.cat_th1 = 0;
3106     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3107     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3108
3109     sampler_dn[index].dw7.sad_tha = 5;
3110     sampler_dn[index].dw7.sad_thb = 10;
3111     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3112     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3113     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3114     sampler_dn[index].dw7.vdi_walker_enable = 0;
3115     sampler_dn[index].dw7.neighborpixel_th = 10;
3116     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3117
3118     dri_bo_unmap(pp_context->sampler_state_table.bo);
3119
3120     /* private function & data */
3121     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3122     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3123     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3124
3125     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3126     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3127     pp_static_parameter->grf1.di_top_field_first = 0;
3128     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3129
3130     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3131     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3132     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3133
3134     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3135     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3136
3137     pp_dn_context->dest_w = w;
3138     pp_dn_context->dest_h = h;
3139
3140     dst_surface->flags = src_surface->flags;
3141
3142     return VA_STATUS_SUCCESS;
3143 }
3144
3145 // update u/v offset when the surface format are packed yuv
3146 static void i965_update_src_surface_uv_offset(
3147     VADriverContextP    ctx, 
3148     struct i965_post_processing_context *pp_context,
3149     const struct i965_surface *surface)
3150 {
3151     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3152     int fourcc = pp_get_surface_fourcc(ctx, surface);
3153     
3154     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3155         pp_static_parameter->grf1.source_packed_u_offset = 1;
3156         pp_static_parameter->grf1.source_packed_v_offset = 3;
3157     } 
3158     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
3159         pp_static_parameter->grf1.source_packed_y_offset = 1;
3160         pp_static_parameter->grf1.source_packed_v_offset = 2;
3161     }
3162     
3163 }
3164
3165 static void i965_update_dst_surface_uv_offset(
3166     VADriverContextP    ctx, 
3167     struct i965_post_processing_context *pp_context,
3168     const struct i965_surface *surface)
3169 {
3170     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3171     int fourcc = pp_get_surface_fourcc(ctx, surface);
3172     
3173     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3174         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
3175         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
3176     } 
3177     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
3178         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
3179         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
3180     }
3181     
3182 }
3183
3184 static VAStatus
3185 ironlake_pp_initialize(
3186     VADriverContextP   ctx,
3187     struct i965_post_processing_context *pp_context,
3188     const struct i965_surface *src_surface,
3189     const VARectangle *src_rect,
3190     struct i965_surface *dst_surface,
3191     const VARectangle *dst_rect,
3192     int                pp_index,
3193     void *filter_param
3194 )
3195 {
3196     VAStatus va_status;
3197     struct i965_driver_data *i965 = i965_driver_data(ctx);
3198     struct pp_module *pp_module;
3199     dri_bo *bo;
3200     int static_param_size, inline_param_size;
3201
3202     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3203     bo = dri_bo_alloc(i965->intel.bufmgr,
3204                       "surface state & binding table",
3205                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3206                       4096);
3207     assert(bo);
3208     pp_context->surface_state_binding_table.bo = bo;
3209
3210     dri_bo_unreference(pp_context->curbe.bo);
3211     bo = dri_bo_alloc(i965->intel.bufmgr,
3212                       "constant buffer",
3213                       4096, 
3214                       4096);
3215     assert(bo);
3216     pp_context->curbe.bo = bo;
3217
3218     dri_bo_unreference(pp_context->idrt.bo);
3219     bo = dri_bo_alloc(i965->intel.bufmgr, 
3220                       "interface discriptor", 
3221                       sizeof(struct i965_interface_descriptor), 
3222                       4096);
3223     assert(bo);
3224     pp_context->idrt.bo = bo;
3225     pp_context->idrt.num_interface_descriptors = 0;
3226
3227     dri_bo_unreference(pp_context->sampler_state_table.bo);
3228     bo = dri_bo_alloc(i965->intel.bufmgr, 
3229                       "sampler state table", 
3230                       4096,
3231                       4096);
3232     assert(bo);
3233     dri_bo_map(bo, True);
3234     memset(bo->virtual, 0, bo->size);
3235     dri_bo_unmap(bo);
3236     pp_context->sampler_state_table.bo = bo;
3237
3238     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3239     bo = dri_bo_alloc(i965->intel.bufmgr, 
3240                       "sampler 8x8 state ",
3241                       4096,
3242                       4096);
3243     assert(bo);
3244     pp_context->sampler_state_table.bo_8x8 = bo;
3245
3246     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3247     bo = dri_bo_alloc(i965->intel.bufmgr, 
3248                       "sampler 8x8 state ",
3249                       4096,
3250                       4096);
3251     assert(bo);
3252     pp_context->sampler_state_table.bo_8x8_uv = bo;
3253
3254     dri_bo_unreference(pp_context->vfe_state.bo);
3255     bo = dri_bo_alloc(i965->intel.bufmgr, 
3256                       "vfe state", 
3257                       sizeof(struct i965_vfe_state), 
3258                       4096);
3259     assert(bo);
3260     pp_context->vfe_state.bo = bo;
3261
3262     if (IS_GEN7(i965->intel.device_id)) {
3263         static_param_size = sizeof(struct gen7_pp_static_parameter);
3264         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3265     } else {
3266         static_param_size = sizeof(struct pp_static_parameter);
3267         inline_param_size = sizeof(struct pp_inline_parameter);
3268     }
3269
3270     memset(pp_context->pp_static_parameter, 0, static_param_size);
3271     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3272     
3273     // update u/v offset for packed yuv
3274     i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
3275     i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
3276
3277     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3278     pp_context->current_pp = pp_index;
3279     pp_module = &pp_context->pp_modules[pp_index];
3280     
3281     if (pp_module->initialize)
3282         va_status = pp_module->initialize(ctx, pp_context,
3283                                           src_surface,
3284                                           src_rect,
3285                                           dst_surface,
3286                                           dst_rect,
3287                                           filter_param);
3288     else
3289         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3290
3291     return va_status;
3292 }
3293
3294 static VAStatus
3295 ironlake_post_processing(
3296     VADriverContextP   ctx,
3297     struct i965_post_processing_context *pp_context,
3298     const struct i965_surface *src_surface,
3299     const VARectangle *src_rect,
3300     struct i965_surface *dst_surface,
3301     const VARectangle *dst_rect,
3302     int                pp_index,
3303     void *filter_param
3304 )
3305 {
3306     VAStatus va_status;
3307
3308     va_status = ironlake_pp_initialize(ctx, pp_context,
3309                                        src_surface,
3310                                        src_rect,
3311                                        dst_surface,
3312                                        dst_rect,
3313                                        pp_index,
3314                                        filter_param);
3315
3316     if (va_status == VA_STATUS_SUCCESS) {
3317         ironlake_pp_states_setup(ctx, pp_context);
3318         ironlake_pp_pipeline_setup(ctx, pp_context);
3319     }
3320
3321     return va_status;
3322 }
3323
3324 static VAStatus
3325 gen6_pp_initialize(
3326     VADriverContextP   ctx,
3327     struct i965_post_processing_context *pp_context,
3328     const struct i965_surface *src_surface,
3329     const VARectangle *src_rect,
3330     struct i965_surface *dst_surface,
3331     const VARectangle *dst_rect,
3332     int                pp_index,
3333     void *filter_param
3334 )
3335 {
3336     VAStatus va_status;
3337     struct i965_driver_data *i965 = i965_driver_data(ctx);
3338     struct pp_module *pp_module;
3339     dri_bo *bo;
3340     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3341     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3342
3343     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3344     bo = dri_bo_alloc(i965->intel.bufmgr,
3345                       "surface state & binding table",
3346                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3347                       4096);
3348     assert(bo);
3349     pp_context->surface_state_binding_table.bo = bo;
3350
3351     dri_bo_unreference(pp_context->curbe.bo);
3352     bo = dri_bo_alloc(i965->intel.bufmgr,
3353                       "constant buffer",
3354                       4096, 
3355                       4096);
3356     assert(bo);
3357     pp_context->curbe.bo = bo;
3358
3359     dri_bo_unreference(pp_context->idrt.bo);
3360     bo = dri_bo_alloc(i965->intel.bufmgr, 
3361                       "interface discriptor", 
3362                       sizeof(struct gen6_interface_descriptor_data), 
3363                       4096);
3364     assert(bo);
3365     pp_context->idrt.bo = bo;
3366     pp_context->idrt.num_interface_descriptors = 0;
3367
3368     dri_bo_unreference(pp_context->sampler_state_table.bo);
3369     bo = dri_bo_alloc(i965->intel.bufmgr, 
3370                       "sampler state table", 
3371                       4096,
3372                       4096);
3373     assert(bo);
3374     dri_bo_map(bo, True);
3375     memset(bo->virtual, 0, bo->size);
3376     dri_bo_unmap(bo);
3377     pp_context->sampler_state_table.bo = bo;
3378
3379     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3380     bo = dri_bo_alloc(i965->intel.bufmgr, 
3381                       "sampler 8x8 state ",
3382                       4096,
3383                       4096);
3384     assert(bo);
3385     pp_context->sampler_state_table.bo_8x8 = bo;
3386
3387     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3388     bo = dri_bo_alloc(i965->intel.bufmgr, 
3389                       "sampler 8x8 state ",
3390                       4096,
3391                       4096);
3392     assert(bo);
3393     pp_context->sampler_state_table.bo_8x8_uv = bo;
3394
3395     dri_bo_unreference(pp_context->vfe_state.bo);
3396     bo = dri_bo_alloc(i965->intel.bufmgr, 
3397                       "vfe state", 
3398                       sizeof(struct i965_vfe_state), 
3399                       4096);
3400     assert(bo);
3401     pp_context->vfe_state.bo = bo;
3402     
3403     memset(pp_static_parameter, 0, sizeof(*pp_static_parameter));
3404     memset(pp_inline_parameter, 0, sizeof(*pp_inline_parameter));
3405
3406     // update u/v offset for packed yuv
3407     i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
3408     i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
3409     
3410     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3411     pp_context->current_pp = pp_index;
3412     pp_module = &pp_context->pp_modules[pp_index];
3413     
3414     if (pp_module->initialize)
3415         va_status = pp_module->initialize(ctx, pp_context,
3416                                           src_surface,
3417                                           src_rect,
3418                                           dst_surface,
3419                                           dst_rect,
3420                                           filter_param);
3421     else
3422         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3423
3424     return va_status;
3425 }
3426
3427 static void
3428 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3429                                    struct i965_post_processing_context *pp_context)
3430 {
3431     struct i965_driver_data *i965 = i965_driver_data(ctx);
3432     struct gen6_interface_descriptor_data *desc;
3433     dri_bo *bo;
3434     int pp_index = pp_context->current_pp;
3435
3436     bo = pp_context->idrt.bo;
3437     dri_bo_map(bo, True);
3438     assert(bo->virtual);
3439     desc = bo->virtual;
3440     memset(desc, 0, sizeof(*desc));
3441     desc->desc0.kernel_start_pointer = 
3442         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3443     desc->desc1.single_program_flow = 1;
3444     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3445     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3446     desc->desc2.sampler_state_pointer = 
3447         pp_context->sampler_state_table.bo->offset >> 5;
3448     desc->desc3.binding_table_entry_count = 0;
3449     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3450     desc->desc4.constant_urb_entry_read_offset = 0;
3451
3452     if (IS_GEN7(i965->intel.device_id))
3453         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3454     else
3455         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3456
3457     dri_bo_emit_reloc(bo,
3458                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3459                       0,
3460                       offsetof(struct gen6_interface_descriptor_data, desc0),
3461                       pp_context->pp_modules[pp_index].kernel.bo);
3462
3463     dri_bo_emit_reloc(bo,
3464                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3465                       desc->desc2.sampler_count << 2,
3466                       offsetof(struct gen6_interface_descriptor_data, desc2),
3467                       pp_context->sampler_state_table.bo);
3468
3469     dri_bo_unmap(bo);
3470     pp_context->idrt.num_interface_descriptors++;
3471 }
3472
3473 static void
3474 gen6_pp_upload_constants(VADriverContextP ctx,
3475                          struct i965_post_processing_context *pp_context)
3476 {
3477     struct i965_driver_data *i965 = i965_driver_data(ctx);
3478     unsigned char *constant_buffer;
3479     int param_size;
3480
3481     assert(sizeof(struct pp_static_parameter) == 128);
3482     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3483
3484     if (IS_GEN7(i965->intel.device_id))
3485         param_size = sizeof(struct gen7_pp_static_parameter);
3486     else
3487         param_size = sizeof(struct pp_static_parameter);
3488
3489     dri_bo_map(pp_context->curbe.bo, 1);
3490     assert(pp_context->curbe.bo->virtual);
3491     constant_buffer = pp_context->curbe.bo->virtual;
3492     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3493     dri_bo_unmap(pp_context->curbe.bo);
3494 }
3495
3496 static void
3497 gen6_pp_states_setup(VADriverContextP ctx,
3498                      struct i965_post_processing_context *pp_context)
3499 {
3500     gen6_pp_interface_descriptor_table(ctx, pp_context);
3501     gen6_pp_upload_constants(ctx, pp_context);
3502 }
3503
3504 static void
3505 gen6_pp_pipeline_select(VADriverContextP ctx,
3506                         struct i965_post_processing_context *pp_context)
3507 {
3508     struct intel_batchbuffer *batch = pp_context->batch;
3509
3510     BEGIN_BATCH(batch, 1);
3511     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3512     ADVANCE_BATCH(batch);
3513 }
3514
3515 static void
3516 gen6_pp_state_base_address(VADriverContextP ctx,
3517                            struct i965_post_processing_context *pp_context)
3518 {
3519     struct intel_batchbuffer *batch = pp_context->batch;
3520
3521     BEGIN_BATCH(batch, 10);
3522     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3523     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3524     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3525     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3526     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3527     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3528     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3529     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3530     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3531     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3532     ADVANCE_BATCH(batch);
3533 }
3534
3535 static void
3536 gen6_pp_vfe_state(VADriverContextP ctx,
3537                   struct i965_post_processing_context *pp_context)
3538 {
3539     struct intel_batchbuffer *batch = pp_context->batch;
3540
3541     BEGIN_BATCH(batch, 8);
3542     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3543     OUT_BATCH(batch, 0);
3544     OUT_BATCH(batch,
3545               (pp_context->urb.num_vfe_entries - 1) << 16 |
3546               pp_context->urb.num_vfe_entries << 8);
3547     OUT_BATCH(batch, 0);
3548     OUT_BATCH(batch,
3549               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3550               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3551     OUT_BATCH(batch, 0);
3552     OUT_BATCH(batch, 0);
3553     OUT_BATCH(batch, 0);
3554     ADVANCE_BATCH(batch);
3555 }
3556
3557 static void
3558 gen6_pp_curbe_load(VADriverContextP ctx,
3559                    struct i965_post_processing_context *pp_context)
3560 {
3561     struct intel_batchbuffer *batch = pp_context->batch;
3562
3563     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3564
3565     BEGIN_BATCH(batch, 4);
3566     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3567     OUT_BATCH(batch, 0);
3568     OUT_BATCH(batch,
3569               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3570     OUT_RELOC(batch, 
3571               pp_context->curbe.bo,
3572               I915_GEM_DOMAIN_INSTRUCTION, 0,
3573               0);
3574     ADVANCE_BATCH(batch);
3575 }
3576
3577 static void
3578 gen6_interface_descriptor_load(VADriverContextP ctx,
3579                                struct i965_post_processing_context *pp_context)
3580 {
3581     struct intel_batchbuffer *batch = pp_context->batch;
3582
3583     BEGIN_BATCH(batch, 4);
3584     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3585     OUT_BATCH(batch, 0);
3586     OUT_BATCH(batch,
3587               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3588     OUT_RELOC(batch, 
3589               pp_context->idrt.bo,
3590               I915_GEM_DOMAIN_INSTRUCTION, 0,
3591               0);
3592     ADVANCE_BATCH(batch);
3593 }
3594
3595 static void
3596 gen6_pp_object_walker(VADriverContextP ctx,
3597                       struct i965_post_processing_context *pp_context)
3598 {
3599     struct i965_driver_data *i965 = i965_driver_data(ctx);
3600     struct intel_batchbuffer *batch = pp_context->batch;
3601     int x, x_steps, y, y_steps;
3602     int param_size, command_length_in_dws;
3603     dri_bo *command_buffer;
3604     unsigned int *command_ptr;
3605
3606     if (IS_GEN7(i965->intel.device_id))
3607         param_size = sizeof(struct gen7_pp_inline_parameter);
3608     else
3609         param_size = sizeof(struct pp_inline_parameter);
3610
3611     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3612     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3613     command_length_in_dws = 6 + (param_size >> 2);
3614     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3615                                   "command objects buffer",
3616                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3617                                   4096);
3618
3619     dri_bo_map(command_buffer, 1);
3620     command_ptr = command_buffer->virtual;
3621
3622     for (y = 0; y < y_steps; y++) {
3623         for (x = 0; x < x_steps; x++) {
3624             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3625                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3626                 *command_ptr++ = 0;
3627                 *command_ptr++ = 0;
3628                 *command_ptr++ = 0;
3629                 *command_ptr++ = 0;
3630                 *command_ptr++ = 0;
3631                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3632                 command_ptr += (param_size >> 2);
3633             }
3634         }
3635     }
3636
3637     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3638         *command_ptr++ = 0;
3639
3640     *command_ptr = MI_BATCH_BUFFER_END;
3641
3642     dri_bo_unmap(command_buffer);
3643
3644     BEGIN_BATCH(batch, 2);
3645     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3646     OUT_RELOC(batch, command_buffer, 
3647               I915_GEM_DOMAIN_COMMAND, 0, 
3648               0);
3649     ADVANCE_BATCH(batch);
3650     
3651     dri_bo_unreference(command_buffer);
3652
3653     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3654      * will cause control to pass back to ring buffer 
3655      */
3656     intel_batchbuffer_end_atomic(batch);
3657     intel_batchbuffer_flush(batch);
3658     intel_batchbuffer_start_atomic(batch, 0x1000);
3659 }
3660
3661 static void
3662 gen6_pp_pipeline_setup(VADriverContextP ctx,
3663                        struct i965_post_processing_context *pp_context)
3664 {
3665     struct intel_batchbuffer *batch = pp_context->batch;
3666
3667     intel_batchbuffer_start_atomic(batch, 0x1000);
3668     intel_batchbuffer_emit_mi_flush(batch);
3669     gen6_pp_pipeline_select(ctx, pp_context);
3670     gen6_pp_state_base_address(ctx, pp_context);
3671     gen6_pp_vfe_state(ctx, pp_context);
3672     gen6_pp_curbe_load(ctx, pp_context);
3673     gen6_interface_descriptor_load(ctx, pp_context);
3674     gen6_pp_object_walker(ctx, pp_context);
3675     intel_batchbuffer_end_atomic(batch);
3676 }
3677
3678 static VAStatus
3679 gen6_post_processing(
3680     VADriverContextP   ctx,
3681     struct i965_post_processing_context *pp_context,
3682     const struct i965_surface *src_surface,
3683     const VARectangle *src_rect,
3684     struct i965_surface *dst_surface,
3685     const VARectangle *dst_rect,
3686     int                pp_index,
3687     void * filter_param
3688 )
3689 {
3690     VAStatus va_status;
3691     
3692     va_status = gen6_pp_initialize(ctx, pp_context,
3693                                    src_surface,
3694                                    src_rect,
3695                                    dst_surface,
3696                                    dst_rect,
3697                                    pp_index,
3698                                    filter_param);
3699
3700     if (va_status == VA_STATUS_SUCCESS) {
3701         gen6_pp_states_setup(ctx, pp_context);
3702         gen6_pp_pipeline_setup(ctx, pp_context);
3703     }
3704
3705     return va_status;
3706 }
3707
3708 static VAStatus
3709 i965_post_processing_internal(
3710     VADriverContextP   ctx,
3711     struct i965_post_processing_context *pp_context,
3712     const struct i965_surface *src_surface,
3713     const VARectangle *src_rect,
3714     struct i965_surface *dst_surface,
3715     const VARectangle *dst_rect,
3716     int                pp_index,
3717     void *filter_param
3718 )
3719 {
3720     struct i965_driver_data *i965 = i965_driver_data(ctx);
3721     VAStatus va_status;
3722
3723     if (IS_GEN6(i965->intel.device_id) ||
3724         IS_GEN7(i965->intel.device_id))
3725         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3726     else
3727         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3728     
3729     return va_status;
3730 }
3731
3732 VAStatus 
3733 i965_DestroySurfaces(VADriverContextP ctx,
3734                      VASurfaceID *surface_list,
3735                      int num_surfaces);
3736 VAStatus 
3737 i965_CreateSurfaces(VADriverContextP ctx,
3738                     int width,
3739                     int height,
3740                     int format,
3741                     int num_surfaces,
3742                     VASurfaceID *surfaces);
3743
3744 static void
3745 rgb_to_yuv(unsigned int argb,
3746            unsigned char *y,
3747            unsigned char *u,
3748            unsigned char *v,
3749            unsigned char *a)
3750 {
3751     int r = ((argb >> 16) & 0xff);
3752     int g = ((argb >> 8) & 0xff);
3753     int b = ((argb >> 0) & 0xff);
3754     
3755     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
3756     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
3757     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
3758     *a = ((argb >> 24) & 0xff);
3759 }
3760
3761 static void 
3762 i965_vpp_clear_surface(VADriverContextP ctx,
3763                        struct i965_post_processing_context *pp_context,
3764                        VASurfaceID surface,
3765                        unsigned int color)
3766 {
3767     struct i965_driver_data *i965 = i965_driver_data(ctx);
3768     struct intel_batchbuffer *batch = pp_context->batch;
3769     struct object_surface *obj_surface = SURFACE(surface);
3770     unsigned int blt_cmd, br13;
3771     unsigned int tiling = 0, swizzle = 0;
3772     int pitch;
3773     unsigned char y, u, v, a = 0;
3774
3775     /* Currently only support NV12 surface */
3776     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3777         return;
3778
3779     rgb_to_yuv(color, &y, &u, &v, &a);
3780
3781     if (a == 0)
3782         return;
3783
3784     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3785     blt_cmd = XY_COLOR_BLT_CMD;
3786     pitch = obj_surface->width;
3787
3788     if (tiling != I915_TILING_NONE) {
3789         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3790         pitch >>= 2;
3791     }
3792
3793     br13 = 0xf0 << 16;
3794     br13 |= BR13_8;
3795     br13 |= pitch;
3796
3797     if (IS_GEN6(i965->intel.device_id) ||
3798         IS_GEN7(i965->intel.device_id)) {
3799         intel_batchbuffer_start_atomic_blt(batch, 48);
3800         BEGIN_BLT_BATCH(batch, 12);
3801     } else {
3802         intel_batchbuffer_start_atomic(batch, 48);
3803         BEGIN_BATCH(batch, 12);
3804     }
3805
3806     OUT_BATCH(batch, blt_cmd);
3807     OUT_BATCH(batch, br13);
3808     OUT_BATCH(batch,
3809               0 << 16 |
3810               0);
3811     OUT_BATCH(batch,
3812               obj_surface->height << 16 |
3813               obj_surface->width);
3814     OUT_RELOC(batch, obj_surface->bo, 
3815               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3816               0);
3817     OUT_BATCH(batch, y);
3818
3819     br13 = 0xf0 << 16;
3820     br13 |= BR13_565;
3821     br13 |= pitch;
3822
3823     OUT_BATCH(batch, blt_cmd);
3824     OUT_BATCH(batch, br13);
3825     OUT_BATCH(batch,
3826               0 << 16 |
3827               0);
3828     OUT_BATCH(batch,
3829               obj_surface->height / 2 << 16 |
3830               obj_surface->width / 2);
3831     OUT_RELOC(batch, obj_surface->bo, 
3832               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3833               obj_surface->width * obj_surface->y_cb_offset);
3834     OUT_BATCH(batch, v << 8 | u);
3835
3836     ADVANCE_BATCH(batch);
3837     intel_batchbuffer_end_atomic(batch);
3838 }
3839
3840 VASurfaceID
3841 i965_post_processing(
3842     VADriverContextP   ctx,
3843     VASurfaceID        surface,
3844     const VARectangle *src_rect,
3845     const VARectangle *dst_rect,
3846     unsigned int       flags,
3847     int               *has_done_scaling  
3848 )
3849 {
3850     struct i965_driver_data *i965 = i965_driver_data(ctx);
3851     VASurfaceID in_surface_id = surface;
3852     VASurfaceID out_surface_id = VA_INVALID_ID;
3853     
3854     *has_done_scaling = 0;
3855
3856     if (HAS_PP(i965)) {
3857         struct object_surface *obj_surface;
3858         VAStatus status;
3859         struct i965_surface src_surface;
3860         struct i965_surface dst_surface;
3861
3862         obj_surface = SURFACE(in_surface_id);
3863
3864         /* Currently only support post processing for NV12 surface */
3865         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3866             return out_surface_id;
3867
3868         _i965LockMutex(&i965->pp_mutex);
3869
3870         if (flags & I965_PP_FLAG_MCDI) {
3871             status = i965_CreateSurfaces(ctx,
3872                                          obj_surface->orig_width,
3873                                          obj_surface->orig_height,
3874                                          VA_RT_FORMAT_YUV420,
3875                                          1,
3876                                          &out_surface_id);
3877             assert(status == VA_STATUS_SUCCESS);
3878             obj_surface = SURFACE(out_surface_id);
3879             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3880             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3881             src_surface.id = in_surface_id;
3882             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3883             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3884                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3885             dst_surface.id = out_surface_id;
3886             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3887             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3888
3889             i965_post_processing_internal(ctx, i965->pp_context,
3890                                           &src_surface,
3891                                           src_rect,
3892                                           &dst_surface,
3893                                           dst_rect,
3894                                           PP_NV12_DNDI,
3895                                           NULL);
3896         }
3897
3898         if (flags & I965_PP_FLAG_AVS) {
3899             struct i965_render_state *render_state = &i965->render_state;
3900             struct intel_region *dest_region = render_state->draw_region;
3901
3902             if (out_surface_id != VA_INVALID_ID)
3903                 in_surface_id = out_surface_id;
3904
3905             status = i965_CreateSurfaces(ctx,
3906                                          dest_region->width,
3907                                          dest_region->height,
3908                                          VA_RT_FORMAT_YUV420,
3909                                          1,
3910                                          &out_surface_id);
3911             assert(status == VA_STATUS_SUCCESS);
3912             obj_surface = SURFACE(out_surface_id);
3913             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3914             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3915             src_surface.id = in_surface_id;
3916             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3917             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3918             dst_surface.id = out_surface_id;
3919             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3920             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3921
3922             i965_post_processing_internal(ctx, i965->pp_context,
3923                                           &src_surface,
3924                                           src_rect,
3925                                           &dst_surface,
3926                                           dst_rect,
3927                                           PP_NV12_AVS,
3928                                           NULL);
3929
3930             if (in_surface_id != surface)
3931                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3932                 
3933             *has_done_scaling = 1;
3934         }
3935
3936         _i965UnlockMutex(&i965->pp_mutex);
3937     }
3938
3939     return out_surface_id;
3940 }       
3941
3942 static VAStatus
3943 i965_image_pl3_processing(VADriverContextP ctx,
3944                           const struct i965_surface *src_surface,
3945                           const VARectangle *src_rect,
3946                           struct i965_surface *dst_surface,
3947                           const VARectangle *dst_rect)
3948 {
3949     struct i965_driver_data *i965 = i965_driver_data(ctx);
3950     struct i965_post_processing_context *pp_context = i965->pp_context;
3951     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3952     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
3953
3954     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3955         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3956                                                  src_surface,
3957                                                  src_rect,
3958                                                  dst_surface,
3959                                                  dst_rect,
3960                                                  PP_PL3_LOAD_SAVE_N12,
3961                                                  NULL);
3962     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
3963                fourcc == VA_FOURCC('I', 'M', 'C', '3')) {
3964         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3965                                                  src_surface,
3966                                                  src_rect,
3967                                                  dst_surface,
3968                                                  dst_rect,
3969                                                  PP_PL3_LOAD_SAVE_PL3,
3970                                                  NULL);
3971     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3972         if (IS_GEN6(i965->intel.device_id))
3973             vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3974                                                      src_surface,
3975                                                      src_rect,
3976                                                      dst_surface,
3977                                                      dst_rect,
3978                                                      PP_PL3_LOAD_SAVE_PA,
3979                                                      NULL);
3980     }
3981     else {
3982         assert(0);
3983     }
3984
3985     intel_batchbuffer_flush(pp_context->batch);
3986
3987     return vaStatus;
3988 }
3989
3990 static VAStatus
3991 i965_image_pl2_processing(VADriverContextP ctx,
3992                           const struct i965_surface *src_surface,
3993                           const VARectangle *src_rect,
3994                           struct i965_surface *dst_surface,
3995                           const VARectangle *dst_rect)
3996 {
3997     struct i965_driver_data *i965 = i965_driver_data(ctx);
3998     struct i965_post_processing_context *pp_context = i965->pp_context;
3999     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4000     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4001
4002     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4003         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4004                                                  src_surface,
4005                                                  src_rect,
4006                                                  dst_surface,
4007                                                  dst_rect,
4008                                                  PP_NV12_LOAD_SAVE_N12,
4009                                                  NULL);
4010     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4011                fourcc == VA_FOURCC('I', 'M', 'C', '3')) {
4012         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4013                                                  src_surface,
4014                                                  src_rect,
4015                                                  dst_surface,
4016                                                  dst_rect,
4017                                                  PP_NV12_LOAD_SAVE_PL3,
4018                                                  NULL);
4019     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
4020         if (IS_GEN6(i965->intel.device_id))
4021             vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4022                                                      src_surface,
4023                                                      src_rect,
4024                                                      dst_surface,
4025                                                      dst_rect,
4026                                                      PP_NV12_LOAD_SAVE_PA,
4027                                                      NULL);
4028     }
4029
4030     intel_batchbuffer_flush(pp_context->batch);
4031
4032     return vaStatus;
4033 }
4034
4035 static VAStatus
4036 i965_image_pl1_processing(VADriverContextP ctx,
4037                           const struct i965_surface *src_surface,
4038                           const VARectangle *src_rect,
4039                           struct i965_surface *dst_surface,
4040                           const VARectangle *dst_rect)
4041 {
4042     struct i965_driver_data *i965 = i965_driver_data(ctx);
4043     struct i965_post_processing_context *pp_context = i965->pp_context;
4044     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4045
4046     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4047         i965_post_processing_internal(ctx, i965->pp_context,
4048                                       src_surface,
4049                                       src_rect,
4050                                       dst_surface,
4051                                       dst_rect,
4052                                       PP_PA_LOAD_SAVE_NV12,
4053                                       NULL);
4054     }
4055     else if (fourcc == VA_FOURCC_YV12) {
4056         i965_post_processing_internal(ctx, i965->pp_context,
4057                                       src_surface,
4058                                       src_rect,
4059                                       dst_surface,
4060                                       dst_rect,
4061                                       PP_PA_LOAD_SAVE_PL3,
4062                                       NULL);
4063
4064     }
4065     else {
4066         return VA_STATUS_ERROR_UNKNOWN;
4067     }
4068
4069     intel_batchbuffer_flush(pp_context->batch);
4070
4071     return VA_STATUS_SUCCESS;
4072 }
4073
4074 VAStatus
4075 i965_image_processing(VADriverContextP ctx,
4076                       const struct i965_surface *src_surface,
4077                       const VARectangle *src_rect,
4078                       struct i965_surface *dst_surface,
4079                       const VARectangle *dst_rect)
4080 {
4081     struct i965_driver_data *i965 = i965_driver_data(ctx);
4082     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4083
4084     if (HAS_PP(i965)) {
4085         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4086
4087         _i965LockMutex(&i965->pp_mutex);
4088
4089         switch (fourcc) {
4090         case VA_FOURCC('Y', 'V', '1', '2'):
4091         case VA_FOURCC('I', '4', '2', '0'):
4092         case VA_FOURCC('I', 'M', 'C', '1'):
4093         case VA_FOURCC('I', 'M', 'C', '3'):
4094             status = i965_image_pl3_processing(ctx,
4095                                                src_surface,
4096                                                src_rect,
4097                                                dst_surface,
4098                                                dst_rect);
4099             break;
4100
4101         case  VA_FOURCC('N', 'V', '1', '2'):
4102             status = i965_image_pl2_processing(ctx,
4103                                                src_surface,
4104                                                src_rect,
4105                                                dst_surface,
4106                                                dst_rect);
4107             break;
4108         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4109             if (IS_GEN6(i965->intel.device_id))
4110                 status = i965_image_pl1_processing(ctx,
4111                                                    src_surface,
4112                                                    src_rect,
4113                                                    dst_surface,
4114                                                    dst_rect);
4115             break;
4116
4117         default:
4118             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4119             break;
4120         }
4121         
4122         _i965UnlockMutex(&i965->pp_mutex);
4123     }
4124
4125     return status;
4126 }       
4127
4128 static void
4129 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4130 {
4131     int i;
4132
4133     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4134     pp_context->surface_state_binding_table.bo = NULL;
4135
4136     dri_bo_unreference(pp_context->curbe.bo);
4137     pp_context->curbe.bo = NULL;
4138
4139     dri_bo_unreference(pp_context->sampler_state_table.bo);
4140     pp_context->sampler_state_table.bo = NULL;
4141
4142     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4143     pp_context->sampler_state_table.bo_8x8 = NULL;
4144
4145     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4146     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4147
4148     dri_bo_unreference(pp_context->idrt.bo);
4149     pp_context->idrt.bo = NULL;
4150     pp_context->idrt.num_interface_descriptors = 0;
4151
4152     dri_bo_unreference(pp_context->vfe_state.bo);
4153     pp_context->vfe_state.bo = NULL;
4154
4155     dri_bo_unreference(pp_context->stmm.bo);
4156     pp_context->stmm.bo = NULL;
4157
4158     for (i = 0; i < NUM_PP_MODULES; i++) {
4159         struct pp_module *pp_module = &pp_context->pp_modules[i];
4160
4161         dri_bo_unreference(pp_module->kernel.bo);
4162         pp_module->kernel.bo = NULL;
4163     }
4164
4165     free(pp_context->pp_static_parameter);
4166     free(pp_context->pp_inline_parameter);
4167     pp_context->pp_static_parameter = NULL;
4168     pp_context->pp_inline_parameter = NULL;
4169 }
4170
4171 Bool
4172 i965_post_processing_terminate(VADriverContextP ctx)
4173 {
4174     struct i965_driver_data *i965 = i965_driver_data(ctx);
4175     struct i965_post_processing_context *pp_context = i965->pp_context;
4176
4177     if (pp_context) {
4178         i965_post_processing_context_finalize(pp_context);
4179         free(pp_context);
4180     }
4181
4182     i965->pp_context = NULL;
4183
4184     return True;
4185 }
4186
4187 static void
4188 i965_post_processing_context_init(VADriverContextP ctx,
4189                                   struct i965_post_processing_context *pp_context,
4190                                   struct intel_batchbuffer *batch)
4191 {
4192     struct i965_driver_data *i965 = i965_driver_data(ctx);
4193     int i;
4194
4195     pp_context->urb.size = URB_SIZE((&i965->intel));
4196     pp_context->urb.num_vfe_entries = 32;
4197     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4198     pp_context->urb.num_cs_entries = 1;
4199     
4200     if (IS_GEN7(i965->intel.device_id))
4201         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4202     else
4203         pp_context->urb.size_cs_entry = 2;
4204
4205     pp_context->urb.vfe_start = 0;
4206     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4207         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4208     assert(pp_context->urb.cs_start + 
4209            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4210
4211     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4212     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4213     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4214
4215     if (IS_GEN7(i965->intel.device_id))
4216         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4217     else if (IS_GEN6(i965->intel.device_id))
4218         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4219     else if (IS_IRONLAKE(i965->intel.device_id))
4220         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4221
4222     for (i = 0; i < NUM_PP_MODULES; i++) {
4223         struct pp_module *pp_module = &pp_context->pp_modules[i];
4224         dri_bo_unreference(pp_module->kernel.bo);
4225         if (pp_module->kernel.bin && pp_module->kernel.size) {
4226             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4227                                                 pp_module->kernel.name,
4228                                                 pp_module->kernel.size,
4229                                                 4096);
4230             assert(pp_module->kernel.bo);
4231             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4232         } else {
4233             pp_module->kernel.bo = NULL;
4234         }
4235     }
4236
4237     /* static & inline parameters */
4238     if (IS_GEN7(i965->intel.device_id)) {
4239         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4240         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4241     } else {
4242         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4243         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4244     }
4245
4246     pp_context->batch = batch;
4247 }
4248
4249 Bool
4250 i965_post_processing_init(VADriverContextP ctx)
4251 {
4252     struct i965_driver_data *i965 = i965_driver_data(ctx);
4253     struct i965_post_processing_context *pp_context = i965->pp_context;
4254
4255     if (HAS_PP(i965)) {
4256         if (pp_context == NULL) {
4257             pp_context = calloc(1, sizeof(*pp_context));
4258             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4259             i965->pp_context = pp_context;
4260         }
4261     }
4262
4263     return True;
4264 }
4265
4266 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4267     PP_NULL,    /* VAProcFilterNone */
4268     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4269     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4270     PP_NULL,    /* VAProcFilterSharpening */
4271     PP_NULL,    /* VAProcFilterColorBalance */
4272     PP_NULL,    /* VAProcFilterColorStandard */
4273 };
4274
4275 static const int proc_frame_to_pp_frame[3] = {
4276     I965_SURFACE_FLAG_FRAME,
4277     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4278     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4279 };
4280
4281 static void 
4282 i965_proc_picture(VADriverContextP ctx, 
4283                   VAProfile profile, 
4284                   union codec_state *codec_state,
4285                   struct hw_context *hw_context)
4286 {
4287     struct i965_driver_data *i965 = i965_driver_data(ctx);
4288     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4289     struct proc_state *proc_state = &codec_state->proc;
4290     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4291     struct object_surface *obj_surface;
4292     struct i965_surface src_surface, dst_surface;
4293     VARectangle src_rect, dst_rect;
4294     VAStatus status;
4295     int i;
4296     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4297     int num_tmp_surfaces = 0;
4298     unsigned int tiling = 0, swizzle = 0;
4299     int in_width, in_height;
4300
4301     assert(pipeline_param->surface != VA_INVALID_ID);
4302     assert(proc_state->current_render_target != VA_INVALID_ID);
4303
4304     obj_surface = SURFACE(pipeline_param->surface);
4305     in_width = obj_surface->orig_width;
4306     in_height = obj_surface->orig_height;
4307     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4308
4309     src_surface.id = pipeline_param->surface;
4310     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4311     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4312
4313     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4314         VASurfaceID out_surface_id = VA_INVALID_ID;
4315
4316         src_surface.id = pipeline_param->surface;
4317         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4318         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4319         src_rect.x = 0;
4320         src_rect.y = 0;
4321         src_rect.width = in_width;
4322         src_rect.height = in_height;
4323
4324         status = i965_CreateSurfaces(ctx,
4325                                      in_width,
4326                                      in_height,
4327                                      VA_RT_FORMAT_YUV420,
4328                                      1,
4329                                      &out_surface_id);
4330         assert(status == VA_STATUS_SUCCESS);
4331         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4332         obj_surface = SURFACE(out_surface_id);
4333         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4334
4335         dst_surface.id = out_surface_id;
4336         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4337         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4338         dst_rect.x = 0;
4339         dst_rect.y = 0;
4340         dst_rect.width = in_width;
4341         dst_rect.height = in_height;
4342
4343         status = i965_image_processing(ctx,
4344                                        &src_surface,
4345                                        &src_rect,
4346                                        &dst_surface,
4347                                        &dst_rect);
4348         assert(status == VA_STATUS_SUCCESS);
4349
4350         src_surface.id = out_surface_id;
4351         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4352         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4353     }
4354
4355     if (pipeline_param->surface_region) {
4356         src_rect.x = pipeline_param->surface_region->x;
4357         src_rect.y = pipeline_param->surface_region->y;
4358         src_rect.width = pipeline_param->surface_region->width;
4359         src_rect.height = pipeline_param->surface_region->height;
4360     } else {
4361         src_rect.x = 0;
4362         src_rect.y = 0;
4363         src_rect.width = in_width;
4364         src_rect.height = in_height;
4365     }
4366
4367     if (pipeline_param->output_region) {
4368         dst_rect.x = pipeline_param->output_region->x;
4369         dst_rect.y = pipeline_param->output_region->y;
4370         dst_rect.width = pipeline_param->output_region->width;
4371         dst_rect.height = pipeline_param->output_region->height;
4372     } else {
4373         dst_rect.x = 0;
4374         dst_rect.y = 0;
4375         dst_rect.width = in_width;
4376         dst_rect.height = in_height;
4377     }
4378
4379     obj_surface = SURFACE(proc_state->current_render_target);
4380     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4381     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4382     
4383     for (i = 0; i < pipeline_param->num_filters; i++) {
4384         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4385         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4386         VAProcFilterType filter_type = filter_param->type;
4387         VASurfaceID out_surface_id = VA_INVALID_ID;
4388         int kernel_index = procfilter_to_pp_flag[filter_type];
4389
4390         if (kernel_index != PP_NULL &&
4391             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4392             status = i965_CreateSurfaces(ctx,
4393                                          in_width,
4394                                          in_height,
4395                                          VA_RT_FORMAT_YUV420,
4396                                          1,
4397                                          &out_surface_id);
4398             assert(status == VA_STATUS_SUCCESS);
4399             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4400             obj_surface = SURFACE(out_surface_id);
4401             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4402             dst_surface.id = out_surface_id;
4403             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4404             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4405                                                    &src_surface,
4406                                                    &src_rect,
4407                                                    &dst_surface,
4408                                                    &src_rect,
4409                                                    kernel_index,
4410                                                    filter_param);
4411
4412             if (status == VA_STATUS_SUCCESS) {
4413                 src_surface.id = dst_surface.id;
4414                 src_surface.type = dst_surface.type;
4415                 src_surface.flags = dst_surface.flags;
4416             }
4417         }
4418     }
4419
4420     dst_surface.id = proc_state->current_render_target;
4421     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4422
4423     if (src_rect.width == dst_rect.width &&
4424         src_rect.height == dst_rect.height) {
4425         i965_post_processing_internal(ctx, &proc_context->pp_context,
4426                                       &src_surface,
4427                                       &src_rect,
4428                                       &dst_surface,
4429                                       &dst_rect,
4430                                       PP_NV12_LOAD_SAVE_N12,
4431                                       NULL);
4432     } else {
4433
4434         i965_post_processing_internal(ctx, &proc_context->pp_context,
4435                                       &src_surface,
4436                                       &src_rect,
4437                                       &dst_surface,
4438                                       &dst_rect,
4439                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4440                                       PP_NV12_AVS : PP_NV12_SCALING,
4441                                       NULL);
4442     }
4443
4444     if (num_tmp_surfaces)
4445         i965_DestroySurfaces(ctx,
4446                              tmp_surfaces,
4447                              num_tmp_surfaces);
4448
4449     intel_batchbuffer_flush(hw_context->batch);
4450 }
4451
4452 static void
4453 i965_proc_context_destroy(void *hw_context)
4454 {
4455     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4456
4457     i965_post_processing_context_finalize(&proc_context->pp_context);
4458     intel_batchbuffer_free(proc_context->base.batch);
4459     free(proc_context);
4460 }
4461
4462 struct hw_context *
4463 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4464 {
4465     struct intel_driver_data *intel = intel_driver_data(ctx);
4466     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4467
4468     proc_context->base.destroy = i965_proc_context_destroy;
4469     proc_context->base.run = i965_proc_picture;
4470     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4471     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4472
4473     return (struct hw_context *)proc_context;
4474 }