support NV12/I420/YV12->I420/YV12 conversion
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
96 };
97
98 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
108 };
109
110 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
111                                    const struct i965_surface *src_surface,
112                                    const VARectangle *src_rect,
113                                    struct i965_surface *dst_surface,
114                                    const VARectangle *dst_rect,
115                                    void *filter_param);
116 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
117                                             const struct i965_surface *src_surface,
118                                             const VARectangle *src_rect,
119                                             struct i965_surface *dst_surface,
120                                             const VARectangle *dst_rect,
121                                             void *filter_param);
122 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                            const struct i965_surface *src_surface,
124                                            const VARectangle *src_rect,
125                                            struct i965_surface *dst_surface,
126                                            const VARectangle *dst_rect,
127                                            void *filter_param);
128 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                              const struct i965_surface *src_surface,
130                                              const VARectangle *src_rect,
131                                              struct i965_surface *dst_surface,
132                                              const VARectangle *dst_rect,
133                                              void *filter_param);
134 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                                 const struct i965_surface *src_surface,
136                                                 const VARectangle *src_rect,
137                                                 struct i965_surface *dst_surface,
138                                                 const VARectangle *dst_rect,
139                                                 void *filter_param);
140 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                         const struct i965_surface *src_surface,
142                                         const VARectangle *src_rect,
143                                         struct i965_surface *dst_surface,
144                                         const VARectangle *dst_rect,
145                                         void *filter_param);
146 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                       const struct i965_surface *src_surface,
148                                       const VARectangle *src_rect,
149                                       struct i965_surface *dst_surface,
150                                       const VARectangle *dst_rect,
151                                       void *filter_param);
152
153 static struct pp_module pp_modules_gen5[] = {
154     {
155         {
156             "NULL module (for testing)",
157             PP_NULL,
158             pp_null_gen5,
159             sizeof(pp_null_gen5),
160             NULL,
161         },
162
163         pp_null_initialize,
164     },
165
166     {
167         {
168             "NV12_NV12",
169             PP_NV12_LOAD_SAVE_N12,
170             pp_nv12_load_save_nv12_gen5,
171             sizeof(pp_nv12_load_save_nv12_gen5),
172             NULL,
173         },
174
175         pp_plx_load_save_plx_initialize,
176     },
177
178     {
179         {
180             "NV12_PL3",
181             PP_NV12_LOAD_SAVE_PL3,
182             pp_nv12_load_save_pl3_gen5,
183             sizeof(pp_nv12_load_save_pl3_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "PL3_NV12",
193             PP_PL3_LOAD_SAVE_N12,
194             pp_pl3_load_save_nv12_gen5,
195             sizeof(pp_pl3_load_save_nv12_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_PL3",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_pl3_gen5,
207             sizeof(pp_pl3_load_save_pl3_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize
212     },
213
214     {
215         {
216             "NV12 Scaling module",
217             PP_NV12_SCALING,
218             pp_nv12_scaling_gen5,
219             sizeof(pp_nv12_scaling_gen5),
220             NULL,
221         },
222
223         pp_nv12_scaling_initialize,
224     },
225
226     {
227         {
228             "NV12 AVS module",
229             PP_NV12_AVS,
230             pp_nv12_avs_gen5,
231             sizeof(pp_nv12_avs_gen5),
232             NULL,
233         },
234
235         pp_nv12_avs_initialize_nlas,
236     },
237
238     {
239         {
240             "NV12 DNDI module",
241             PP_NV12_DNDI,
242             pp_nv12_dndi_gen5,
243             sizeof(pp_nv12_dndi_gen5),
244             NULL,
245         },
246
247         pp_nv12_dndi_initialize,
248     },
249
250     {
251         {
252             "NV12 DN module",
253             PP_NV12_DN,
254             pp_nv12_dn_gen5,
255             sizeof(pp_nv12_dn_gen5),
256             NULL,
257         },
258
259         pp_nv12_dn_initialize,
260     },
261
262     {
263         {
264             "NV12_PA module",
265             PP_NV12_LOAD_SAVE_PA,
266             pp_nv12_load_save_pa_gen5,
267             sizeof(pp_nv12_load_save_pa_gen5),
268             NULL,
269         },
270     
271         pp_plx_load_save_plx_initialize,
272     },
273
274     {
275         {
276             "PL3_PA module",
277             PP_PL3_LOAD_SAVE_PA,
278             pp_pl3_load_save_pa_gen5,
279             sizeof(pp_pl3_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PA_NV12 module",
289             PP_PA_LOAD_SAVE_NV12,
290             pp_pa_load_save_nv12_gen5,
291             sizeof(pp_pa_load_save_nv12_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_PL3 module",
301             PP_PA_LOAD_SAVE_PL3,
302             pp_pa_load_save_pl3_gen5,
303             sizeof(pp_pa_load_save_pl3_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310 };
311
312 static const uint32_t pp_null_gen6[][4] = {
313 #include "shaders/post_processing/gen5_6/null.g6b"
314 };
315
316 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
317 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
318 };
319
320 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
321 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
322 };
323
324 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
325 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
326 };
327
328 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
329 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
330 };
331
332 static const uint32_t pp_nv12_scaling_gen6[][4] = {
333 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
334 };
335
336 static const uint32_t pp_nv12_avs_gen6[][4] = {
337 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
338 };
339
340 static const uint32_t pp_nv12_dndi_gen6[][4] = {
341 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
342 };
343
344 static const uint32_t pp_nv12_dn_gen6[][4] = {
345 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
346 };
347
348 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
350 };
351
352 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
354 };
355
356 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
358 };
359
360 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
362 };
363
364 static struct pp_module pp_modules_gen6[] = {
365     {
366         {
367             "NULL module (for testing)",
368             PP_NULL,
369             pp_null_gen6,
370             sizeof(pp_null_gen6),
371             NULL,
372         },
373
374         pp_null_initialize,
375     },
376
377     {
378         {
379             "NV12_NV12",
380             PP_NV12_LOAD_SAVE_N12,
381             pp_nv12_load_save_nv12_gen6,
382             sizeof(pp_nv12_load_save_nv12_gen6),
383             NULL,
384         },
385
386         pp_plx_load_save_plx_initialize,
387     },
388
389     {
390         {
391             "NV12_PL3",
392             PP_NV12_LOAD_SAVE_PL3,
393             pp_nv12_load_save_pl3_gen6,
394             sizeof(pp_nv12_load_save_pl3_gen6),
395             NULL,
396         },
397         
398         pp_plx_load_save_plx_initialize,
399     },
400
401     {
402         {
403             "PL3_NV12",
404             PP_PL3_LOAD_SAVE_N12,
405             pp_pl3_load_save_nv12_gen6,
406             sizeof(pp_pl3_load_save_nv12_gen6),
407             NULL,
408         },
409
410         pp_plx_load_save_plx_initialize,
411     },
412
413     {
414         {
415             "PL3_PL3",
416             PP_PL3_LOAD_SAVE_N12,
417             pp_pl3_load_save_pl3_gen6,
418             sizeof(pp_pl3_load_save_pl3_gen6),
419             NULL,
420         },
421
422         pp_plx_load_save_plx_initialize,
423     },
424
425     {
426         {
427             "NV12 Scaling module",
428             PP_NV12_SCALING,
429             pp_nv12_scaling_gen6,
430             sizeof(pp_nv12_scaling_gen6),
431             NULL,
432         },
433
434         gen6_nv12_scaling_initialize,
435     },
436
437     {
438         {
439             "NV12 AVS module",
440             PP_NV12_AVS,
441             pp_nv12_avs_gen6,
442             sizeof(pp_nv12_avs_gen6),
443             NULL,
444         },
445
446         pp_nv12_avs_initialize_nlas,
447     },
448
449     {
450         {
451             "NV12 DNDI module",
452             PP_NV12_DNDI,
453             pp_nv12_dndi_gen6,
454             sizeof(pp_nv12_dndi_gen6),
455             NULL,
456         },
457
458         pp_nv12_dndi_initialize,
459     },
460
461     {
462         {
463             "NV12 DN module",
464             PP_NV12_DN,
465             pp_nv12_dn_gen6,
466             sizeof(pp_nv12_dn_gen6),
467             NULL,
468         },
469
470         pp_nv12_dn_initialize,
471     },
472     {
473         {
474             "NV12_PA module",
475             PP_NV12_LOAD_SAVE_PA,
476             pp_nv12_load_save_pa_gen6,
477             sizeof(pp_nv12_load_save_pa_gen6),
478             NULL,
479         },
480     
481         pp_plx_load_save_plx_initialize,
482     },
483     
484     {
485         {
486             "PL3_PA module",
487             PP_PL3_LOAD_SAVE_PA,
488             pp_pl3_load_save_pa_gen6,
489             sizeof(pp_pl3_load_save_pa_gen6),
490             NULL,
491         },
492     
493         pp_plx_load_save_plx_initialize,
494     },
495     
496     {
497         {
498             "PA_NV12 module",
499             PP_PA_LOAD_SAVE_NV12,
500             pp_pa_load_save_nv12_gen6,
501             sizeof(pp_pa_load_save_nv12_gen6),
502             NULL,
503         },
504     
505         pp_plx_load_save_plx_initialize,
506     },
507
508     {
509         {
510             "PA_PL3 module",
511             PP_PA_LOAD_SAVE_PL3,
512             pp_pa_load_save_pl3_gen6,
513             sizeof(pp_pa_load_save_pl3_gen6),
514             NULL,
515         },
516     
517         pp_plx_load_save_plx_initialize,
518     },
519     
520 };
521
522 static const uint32_t pp_null_gen7[][4] = {
523 };
524
525 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
526 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
527 };
528
529 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
530 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
531 };
532
533 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
534 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
535 };
536
537 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
538 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
539 };
540
541 static const uint32_t pp_nv12_scaling_gen7[][4] = {
542 #include "shaders/post_processing/gen7/avs.g7b"
543 };
544
545 static const uint32_t pp_nv12_avs_gen7[][4] = {
546 #include "shaders/post_processing/gen7/avs.g7b"
547 };
548
549 static const uint32_t pp_nv12_dndi_gen7[][4] = {
550 // #include "shaders/post_processing/gen7/dndi.g7b"
551 };
552
553 static const uint32_t pp_nv12_dn_gen7[][4] = {
554 };
555 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
556 };
557 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
558 };
559 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
560 };
561 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
562 };
563
564 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
565                                            const struct i965_surface *src_surface,
566                                            const VARectangle *src_rect,
567                                            struct i965_surface *dst_surface,
568                                            const VARectangle *dst_rect,
569                                            void *filter_param);
570 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
571                                              const struct i965_surface *src_surface,
572                                              const VARectangle *src_rect,
573                                              struct i965_surface *dst_surface,
574                                              const VARectangle *dst_rect,
575                                              void *filter_param);
576 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
577                                            const struct i965_surface *src_surface,
578                                            const VARectangle *src_rect,
579                                            struct i965_surface *dst_surface,
580                                            const VARectangle *dst_rect,
581                                            void *filter_param);
582
583 static struct pp_module pp_modules_gen7[] = {
584     {
585         {
586             "NULL module (for testing)",
587             PP_NULL,
588             pp_null_gen7,
589             sizeof(pp_null_gen7),
590             NULL,
591         },
592
593         pp_null_initialize,
594     },
595
596     {
597         {
598             "NV12_NV12",
599             PP_NV12_LOAD_SAVE_N12,
600             pp_nv12_load_save_nv12_gen7,
601             sizeof(pp_nv12_load_save_nv12_gen7),
602             NULL,
603         },
604
605         gen7_pp_plx_avs_initialize,
606     },
607
608     {
609         {
610             "NV12_PL3",
611             PP_NV12_LOAD_SAVE_PL3,
612             pp_nv12_load_save_pl3_gen7,
613             sizeof(pp_nv12_load_save_pl3_gen7),
614             NULL,
615         },
616         
617         gen7_pp_plx_avs_initialize,
618     },
619
620     {
621         {
622             "PL3_NV12",
623             PP_PL3_LOAD_SAVE_N12,
624             pp_pl3_load_save_nv12_gen7,
625             sizeof(pp_pl3_load_save_nv12_gen7),
626             NULL,
627         },
628
629         gen7_pp_plx_avs_initialize,
630     },
631
632     {
633         {
634             "PL3_PL3",
635             PP_PL3_LOAD_SAVE_N12,
636             pp_pl3_load_save_pl3_gen7,
637             sizeof(pp_pl3_load_save_pl3_gen7),
638             NULL,
639         },
640
641         gen7_pp_plx_avs_initialize,
642     },
643
644     {
645         {
646             "NV12 Scaling module",
647             PP_NV12_SCALING,
648             pp_nv12_scaling_gen7,
649             sizeof(pp_nv12_scaling_gen7),
650             NULL,
651         },
652
653         gen7_pp_plx_avs_initialize,
654     },
655
656     {
657         {
658             "NV12 AVS module",
659             PP_NV12_AVS,
660             pp_nv12_avs_gen7,
661             sizeof(pp_nv12_avs_gen7),
662             NULL,
663         },
664
665         gen7_pp_plx_avs_initialize,
666     },
667
668     {
669         {
670             "NV12 DNDI module",
671             PP_NV12_DNDI,
672             pp_nv12_dndi_gen7,
673             sizeof(pp_nv12_dndi_gen7),
674             NULL,
675         },
676
677         gen7_pp_nv12_dndi_initialize,
678     },
679
680     {
681         {
682             "NV12 DN module",
683             PP_NV12_DN,
684             pp_nv12_dn_gen7,
685             sizeof(pp_nv12_dn_gen7),
686             NULL,
687         },
688
689         gen7_pp_nv12_dn_initialize,
690     },
691     {
692         {
693             "NV12_PA module",
694             PP_NV12_LOAD_SAVE_PA,
695             pp_nv12_load_save_pa_gen7,
696             sizeof(pp_nv12_load_save_pa_gen7),
697             NULL,
698         },
699     
700         pp_plx_load_save_plx_initialize,
701     },
702
703     {
704         {
705             "PL3_PA module",
706             PP_PL3_LOAD_SAVE_PA,
707             pp_pl3_load_save_pa_gen7,
708             sizeof(pp_pl3_load_save_pa_gen7),
709             NULL,
710         },
711     
712         pp_plx_load_save_plx_initialize,
713     },
714
715     {
716         {
717             "PA_NV12 module",
718             PP_PA_LOAD_SAVE_NV12,
719             pp_pa_load_save_nv12_gen7,
720             sizeof(pp_pa_load_save_nv12_gen7),
721             NULL,
722         },
723     
724         pp_plx_load_save_plx_initialize,
725     },
726
727     {
728         {
729             "PA_PL3 module",
730             PP_PA_LOAD_SAVE_PL3,
731             pp_pa_load_save_pl3_gen7,
732             sizeof(pp_pa_load_save_pl3_gen7),
733             NULL,
734         },
735     
736         pp_plx_load_save_plx_initialize,
737     },
738     
739 };
740
741 static int
742 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
743 {
744     struct i965_driver_data *i965 = i965_driver_data(ctx);
745     int fourcc;
746
747     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
748         struct object_image *obj_image = IMAGE(surface->id);
749         fourcc = obj_image->image.format.fourcc;
750     } else {
751         struct object_surface *obj_surface = SURFACE(surface->id);
752         fourcc = obj_surface->fourcc;
753     }
754
755     return fourcc;
756 }
757
758 static void
759 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
760 {
761     switch (tiling) {
762     case I915_TILING_NONE:
763         ss->ss3.tiled_surface = 0;
764         ss->ss3.tile_walk = 0;
765         break;
766     case I915_TILING_X:
767         ss->ss3.tiled_surface = 1;
768         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
769         break;
770     case I915_TILING_Y:
771         ss->ss3.tiled_surface = 1;
772         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
773         break;
774     }
775 }
776
777 static void
778 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
779 {
780     switch (tiling) {
781     case I915_TILING_NONE:
782         ss->ss2.tiled_surface = 0;
783         ss->ss2.tile_walk = 0;
784         break;
785     case I915_TILING_X:
786         ss->ss2.tiled_surface = 1;
787         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
788         break;
789     case I915_TILING_Y:
790         ss->ss2.tiled_surface = 1;
791         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
792         break;
793     }
794 }
795
796 static void
797 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
798 {
799     switch (tiling) {
800     case I915_TILING_NONE:
801         ss->ss0.tiled_surface = 0;
802         ss->ss0.tile_walk = 0;
803         break;
804     case I915_TILING_X:
805         ss->ss0.tiled_surface = 1;
806         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
807         break;
808     case I915_TILING_Y:
809         ss->ss0.tiled_surface = 1;
810         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
811         break;
812     }
813 }
814
815 static void
816 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
817 {
818     switch (tiling) {
819     case I915_TILING_NONE:
820         ss->ss2.tiled_surface = 0;
821         ss->ss2.tile_walk = 0;
822         break;
823     case I915_TILING_X:
824         ss->ss2.tiled_surface = 1;
825         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
826         break;
827     case I915_TILING_Y:
828         ss->ss2.tiled_surface = 1;
829         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
830         break;
831     }
832 }
833
834 static void
835 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
836 {
837     struct i965_interface_descriptor *desc;
838     dri_bo *bo;
839     int pp_index = pp_context->current_pp;
840
841     bo = pp_context->idrt.bo;
842     dri_bo_map(bo, 1);
843     assert(bo->virtual);
844     desc = bo->virtual;
845     memset(desc, 0, sizeof(*desc));
846     desc->desc0.grf_reg_blocks = 10;
847     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
848     desc->desc1.const_urb_entry_read_offset = 0;
849     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
850     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
851     desc->desc2.sampler_count = 0;
852     desc->desc3.binding_table_entry_count = 0;
853     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
854
855     dri_bo_emit_reloc(bo,
856                       I915_GEM_DOMAIN_INSTRUCTION, 0,
857                       desc->desc0.grf_reg_blocks,
858                       offsetof(struct i965_interface_descriptor, desc0),
859                       pp_context->pp_modules[pp_index].kernel.bo);
860
861     dri_bo_emit_reloc(bo,
862                       I915_GEM_DOMAIN_INSTRUCTION, 0,
863                       desc->desc2.sampler_count << 2,
864                       offsetof(struct i965_interface_descriptor, desc2),
865                       pp_context->sampler_state_table.bo);
866
867     dri_bo_unmap(bo);
868     pp_context->idrt.num_interface_descriptors++;
869 }
870
871 static void
872 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
873 {
874     struct i965_vfe_state *vfe_state;
875     dri_bo *bo;
876
877     bo = pp_context->vfe_state.bo;
878     dri_bo_map(bo, 1);
879     assert(bo->virtual);
880     vfe_state = bo->virtual;
881     memset(vfe_state, 0, sizeof(*vfe_state));
882     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
883     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
884     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
885     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
886     vfe_state->vfe1.children_present = 0;
887     vfe_state->vfe2.interface_descriptor_base = 
888         pp_context->idrt.bo->offset >> 4; /* reloc */
889     dri_bo_emit_reloc(bo,
890                       I915_GEM_DOMAIN_INSTRUCTION, 0,
891                       0,
892                       offsetof(struct i965_vfe_state, vfe2),
893                       pp_context->idrt.bo);
894     dri_bo_unmap(bo);
895 }
896
897 static void
898 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
899 {
900     unsigned char *constant_buffer;
901     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
902
903     assert(sizeof(*pp_static_parameter) == 128);
904     dri_bo_map(pp_context->curbe.bo, 1);
905     assert(pp_context->curbe.bo->virtual);
906     constant_buffer = pp_context->curbe.bo->virtual;
907     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
908     dri_bo_unmap(pp_context->curbe.bo);
909 }
910
911 static void
912 ironlake_pp_states_setup(VADriverContextP ctx,
913                          struct i965_post_processing_context *pp_context)
914 {
915     ironlake_pp_interface_descriptor_table(pp_context);
916     ironlake_pp_vfe_state(pp_context);
917     ironlake_pp_upload_constants(pp_context);
918 }
919
920 static void
921 ironlake_pp_pipeline_select(VADriverContextP ctx,
922                             struct i965_post_processing_context *pp_context)
923 {
924     struct intel_batchbuffer *batch = pp_context->batch;
925
926     BEGIN_BATCH(batch, 1);
927     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
928     ADVANCE_BATCH(batch);
929 }
930
931 static void
932 ironlake_pp_urb_layout(VADriverContextP ctx,
933                        struct i965_post_processing_context *pp_context)
934 {
935     struct intel_batchbuffer *batch = pp_context->batch;
936     unsigned int vfe_fence, cs_fence;
937
938     vfe_fence = pp_context->urb.cs_start;
939     cs_fence = pp_context->urb.size;
940
941     BEGIN_BATCH(batch, 3);
942     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
943     OUT_BATCH(batch, 0);
944     OUT_BATCH(batch, 
945               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
946               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
947     ADVANCE_BATCH(batch);
948 }
949
950 static void
951 ironlake_pp_state_base_address(VADriverContextP ctx,
952                                struct i965_post_processing_context *pp_context)
953 {
954     struct intel_batchbuffer *batch = pp_context->batch;
955
956     BEGIN_BATCH(batch, 8);
957     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
958     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
959     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
960     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
961     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
962     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
963     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
964     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
965     ADVANCE_BATCH(batch);
966 }
967
968 static void
969 ironlake_pp_state_pointers(VADriverContextP ctx,
970                            struct i965_post_processing_context *pp_context)
971 {
972     struct intel_batchbuffer *batch = pp_context->batch;
973
974     BEGIN_BATCH(batch, 3);
975     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
976     OUT_BATCH(batch, 0);
977     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
978     ADVANCE_BATCH(batch);
979 }
980
981 static void 
982 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
983                           struct i965_post_processing_context *pp_context)
984 {
985     struct intel_batchbuffer *batch = pp_context->batch;
986
987     BEGIN_BATCH(batch, 2);
988     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
989     OUT_BATCH(batch,
990               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
991               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
992     ADVANCE_BATCH(batch);
993 }
994
995 static void
996 ironlake_pp_constant_buffer(VADriverContextP ctx,
997                             struct i965_post_processing_context *pp_context)
998 {
999     struct intel_batchbuffer *batch = pp_context->batch;
1000
1001     BEGIN_BATCH(batch, 2);
1002     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1003     OUT_RELOC(batch, pp_context->curbe.bo,
1004               I915_GEM_DOMAIN_INSTRUCTION, 0,
1005               pp_context->urb.size_cs_entry - 1);
1006     ADVANCE_BATCH(batch);    
1007 }
1008
1009 static void
1010 ironlake_pp_object_walker(VADriverContextP ctx,
1011                           struct i965_post_processing_context *pp_context)
1012 {
1013     struct intel_batchbuffer *batch = pp_context->batch;
1014     int x, x_steps, y, y_steps;
1015     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1016
1017     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1018     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1019
1020     for (y = 0; y < y_steps; y++) {
1021         for (x = 0; x < x_steps; x++) {
1022             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1023                 BEGIN_BATCH(batch, 20);
1024                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1025                 OUT_BATCH(batch, 0);
1026                 OUT_BATCH(batch, 0); /* no indirect data */
1027                 OUT_BATCH(batch, 0);
1028
1029                 /* inline data grf 5-6 */
1030                 assert(sizeof(*pp_inline_parameter) == 64);
1031                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1032
1033                 ADVANCE_BATCH(batch);
1034             }
1035         }
1036     }
1037 }
1038
1039 static void
1040 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1041                            struct i965_post_processing_context *pp_context)
1042 {
1043     struct intel_batchbuffer *batch = pp_context->batch;
1044
1045     intel_batchbuffer_start_atomic(batch, 0x1000);
1046     intel_batchbuffer_emit_mi_flush(batch);
1047     ironlake_pp_pipeline_select(ctx, pp_context);
1048     ironlake_pp_state_base_address(ctx, pp_context);
1049     ironlake_pp_state_pointers(ctx, pp_context);
1050     ironlake_pp_urb_layout(ctx, pp_context);
1051     ironlake_pp_cs_urb_layout(ctx, pp_context);
1052     ironlake_pp_constant_buffer(ctx, pp_context);
1053     ironlake_pp_object_walker(ctx, pp_context);
1054     intel_batchbuffer_end_atomic(batch);
1055 }
1056
1057 // update u/v offset when the surface format are packed yuv
1058 static void i965_update_src_surface_uv_offset(
1059     VADriverContextP    ctx, 
1060     struct i965_post_processing_context *pp_context,
1061     const struct i965_surface *surface)
1062 {
1063     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1064     int fourcc = pp_get_surface_fourcc(ctx, surface);
1065     
1066     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
1067         pp_static_parameter->grf1.source_packed_u_offset = 1;
1068         pp_static_parameter->grf1.source_packed_v_offset = 3;
1069     } 
1070     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
1071         pp_static_parameter->grf1.source_packed_y_offset = 1;
1072         pp_static_parameter->grf1.source_packed_v_offset = 2;
1073     }
1074     
1075 }
1076
1077 static void i965_update_dst_surface_uv_offset(
1078     VADriverContextP    ctx, 
1079     struct i965_post_processing_context *pp_context,
1080     const struct i965_surface *surface)
1081 {
1082     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1083     int fourcc = pp_get_surface_fourcc(ctx, surface);
1084     
1085     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
1086         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1087         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1088     } 
1089     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
1090         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1091         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1092     }
1093     
1094 }
1095
1096 static void
1097 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1098                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1099                           int width, int height, int pitch, int format, 
1100                           int index, int is_target)
1101 {
1102     struct i965_surface_state *ss;
1103     dri_bo *ss_bo;
1104     unsigned int tiling;
1105     unsigned int swizzle;
1106
1107     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1108     ss_bo = pp_context->surface_state_binding_table.bo;
1109     assert(ss_bo);
1110
1111     dri_bo_map(ss_bo, True);
1112     assert(ss_bo->virtual);
1113     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1114     memset(ss, 0, sizeof(*ss));
1115     ss->ss0.surface_type = I965_SURFACE_2D;
1116     ss->ss0.surface_format = format;
1117     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1118     ss->ss2.width = width - 1;
1119     ss->ss2.height = height - 1;
1120     ss->ss3.pitch = pitch - 1;
1121     pp_set_surface_tiling(ss, tiling);
1122     dri_bo_emit_reloc(ss_bo,
1123                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1124                       surf_bo_offset,
1125                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1126                       surf_bo);
1127     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1128     dri_bo_unmap(ss_bo);
1129 }
1130
1131 static void
1132 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1133                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1134                            int width, int height, int wpitch,
1135                            int xoffset, int yoffset,
1136                            int format, int interleave_chroma,
1137                            int index)
1138 {
1139     struct i965_surface_state2 *ss2;
1140     dri_bo *ss2_bo;
1141     unsigned int tiling;
1142     unsigned int swizzle;
1143
1144     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1145     ss2_bo = pp_context->surface_state_binding_table.bo;
1146     assert(ss2_bo);
1147
1148     dri_bo_map(ss2_bo, True);
1149     assert(ss2_bo->virtual);
1150     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1151     memset(ss2, 0, sizeof(*ss2));
1152     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1153     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1154     ss2->ss1.width = width - 1;
1155     ss2->ss1.height = height - 1;
1156     ss2->ss2.pitch = wpitch - 1;
1157     ss2->ss2.interleave_chroma = interleave_chroma;
1158     ss2->ss2.surface_format = format;
1159     ss2->ss3.x_offset_for_cb = xoffset;
1160     ss2->ss3.y_offset_for_cb = yoffset;
1161     pp_set_surface2_tiling(ss2, tiling);
1162     dri_bo_emit_reloc(ss2_bo,
1163                       I915_GEM_DOMAIN_RENDER, 0,
1164                       surf_bo_offset,
1165                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1166                       surf_bo);
1167     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1168     dri_bo_unmap(ss2_bo);
1169 }
1170
1171 static void
1172 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1173                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1174                           int width, int height, int pitch, int format, 
1175                           int index, int is_target)
1176 {
1177     struct gen7_surface_state *ss;
1178     dri_bo *ss_bo;
1179     unsigned int tiling;
1180     unsigned int swizzle;
1181
1182     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1183     ss_bo = pp_context->surface_state_binding_table.bo;
1184     assert(ss_bo);
1185
1186     dri_bo_map(ss_bo, True);
1187     assert(ss_bo->virtual);
1188     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1189     memset(ss, 0, sizeof(*ss));
1190     ss->ss0.surface_type = I965_SURFACE_2D;
1191     ss->ss0.surface_format = format;
1192     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1193     ss->ss2.width = width - 1;
1194     ss->ss2.height = height - 1;
1195     ss->ss3.pitch = pitch - 1;
1196     gen7_pp_set_surface_tiling(ss, tiling);
1197     dri_bo_emit_reloc(ss_bo,
1198                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1199                       surf_bo_offset,
1200                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1201                       surf_bo);
1202     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1203     dri_bo_unmap(ss_bo);
1204 }
1205
1206 static void
1207 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1208                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1209                            int width, int height, int wpitch,
1210                            int xoffset, int yoffset,
1211                            int format, int interleave_chroma,
1212                            int index)
1213 {
1214     struct gen7_surface_state2 *ss2;
1215     dri_bo *ss2_bo;
1216     unsigned int tiling;
1217     unsigned int swizzle;
1218
1219     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1220     ss2_bo = pp_context->surface_state_binding_table.bo;
1221     assert(ss2_bo);
1222
1223     dri_bo_map(ss2_bo, True);
1224     assert(ss2_bo->virtual);
1225     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1226     memset(ss2, 0, sizeof(*ss2));
1227     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1228     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1229     ss2->ss1.width = width - 1;
1230     ss2->ss1.height = height - 1;
1231     ss2->ss2.pitch = wpitch - 1;
1232     ss2->ss2.interleave_chroma = interleave_chroma;
1233     ss2->ss2.surface_format = format;
1234     ss2->ss3.x_offset_for_cb = xoffset;
1235     ss2->ss3.y_offset_for_cb = yoffset;
1236     gen7_pp_set_surface2_tiling(ss2, tiling);
1237     dri_bo_emit_reloc(ss2_bo,
1238                       I915_GEM_DOMAIN_RENDER, 0,
1239                       surf_bo_offset,
1240                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1241                       surf_bo);
1242     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1243     dri_bo_unmap(ss2_bo);
1244 }
1245
1246 static void 
1247 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1248                                 const struct i965_surface *surface, 
1249                                 int base_index, int is_target,
1250                                 int *width, int *height, int *pitch, int *offset)
1251 {
1252     struct i965_driver_data *i965 = i965_driver_data(ctx);
1253     struct object_surface *obj_surface;
1254     struct object_image *obj_image;
1255     dri_bo *bo;
1256     int fourcc = pp_get_surface_fourcc(ctx, surface);
1257     const int Y = 0;
1258     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1259     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1260     const int UV = 1;
1261     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1262     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1263
1264     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1265         obj_surface = SURFACE(surface->id);
1266         bo = obj_surface->bo;
1267         width[0] = obj_surface->orig_width;
1268         height[0] = obj_surface->orig_height;
1269         pitch[0] = obj_surface->width;
1270         offset[0] = 0;
1271
1272         if (packed_yuv ) {
1273             width[0] = obj_surface->orig_width * 2; 
1274             pitch[0] = obj_surface->width * 2;
1275         }
1276         else if (interleaved_uv) {
1277             width[1] = obj_surface->orig_width;
1278             height[1] = obj_surface->orig_height / 2;
1279             pitch[1] = obj_surface->width;
1280             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1281         } else {
1282             width[1] = obj_surface->orig_width / 2;
1283             height[1] = obj_surface->orig_height / 2;
1284             pitch[1] = obj_surface->width / 2;
1285             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1286             width[2] = obj_surface->orig_width / 2;
1287             height[2] = obj_surface->orig_height / 2;
1288             pitch[2] = obj_surface->width / 2;
1289             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1290         }
1291     } else {
1292         obj_image = IMAGE(surface->id);
1293         bo = obj_image->bo;
1294         width[0] = obj_image->image.width;
1295         height[0] = obj_image->image.height;
1296         pitch[0] = obj_image->image.pitches[0];
1297         offset[0] = obj_image->image.offsets[0];
1298
1299         if (packed_yuv ) {
1300             width[0] = obj_image->image.width * 2;
1301         }
1302         else if (interleaved_uv) {
1303             width[1] = obj_image->image.width;
1304             height[1] = obj_image->image.height / 2;
1305             pitch[1] = obj_image->image.pitches[1];
1306             offset[1] = obj_image->image.offsets[1];
1307         } else {
1308             width[1] = obj_image->image.width / 2;
1309             height[1] = obj_image->image.height / 2;
1310             pitch[1] = obj_image->image.pitches[1];
1311             offset[1] = obj_image->image.offsets[1];
1312             width[2] = obj_image->image.width / 2;
1313             height[2] = obj_image->image.height / 2;
1314             pitch[2] = obj_image->image.pitches[2];
1315             offset[2] = obj_image->image.offsets[2];
1316         }
1317     }
1318
1319     /* Y surface */
1320     i965_pp_set_surface_state(ctx, pp_context,
1321                               bo, offset[Y],
1322                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1323                               base_index, is_target);
1324
1325     if (!packed_yuv) {
1326         if (interleaved_uv) {
1327             i965_pp_set_surface_state(ctx, pp_context,
1328                                       bo, offset[UV],
1329                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1330                                       base_index + 1, is_target);
1331         } else {
1332             /* U surface */
1333             i965_pp_set_surface_state(ctx, pp_context,
1334                                       bo, offset[U],
1335                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1336                                       base_index + 1, is_target);
1337
1338             /* V surface */
1339             i965_pp_set_surface_state(ctx, pp_context,
1340                                       bo, offset[V],
1341                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1342                                       base_index + 2, is_target);
1343         }
1344     }
1345
1346 }
1347
1348 static void 
1349 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1350                                      const struct i965_surface *surface, 
1351                                      int base_index, int is_target,
1352                                      int *width, int *height, int *pitch, int *offset)
1353 {
1354     struct i965_driver_data *i965 = i965_driver_data(ctx);
1355     struct object_surface *obj_surface;
1356     struct object_image *obj_image;
1357     dri_bo *bo;
1358     int fourcc = pp_get_surface_fourcc(ctx, surface);
1359     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1360                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1361     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1362                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1363     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1364
1365     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1366         obj_surface = SURFACE(surface->id);
1367         bo = obj_surface->bo;
1368         width[0] = obj_surface->orig_width;
1369         height[0] = obj_surface->orig_height;
1370         pitch[0] = obj_surface->width;
1371         offset[0] = 0;
1372
1373         width[1] = obj_surface->cb_cr_width;
1374         height[1] = obj_surface->cb_cr_height;
1375         pitch[1] = obj_surface->cb_cr_pitch;
1376         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1377
1378         width[2] = obj_surface->cb_cr_width;
1379         height[2] = obj_surface->cb_cr_height;
1380         pitch[2] = obj_surface->cb_cr_pitch;
1381         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1382     } else {
1383         obj_image = IMAGE(surface->id);
1384         bo = obj_image->bo;
1385         width[0] = obj_image->image.width;
1386         height[0] = obj_image->image.height;
1387         pitch[0] = obj_image->image.pitches[0];
1388         offset[0] = obj_image->image.offsets[0];
1389
1390         if (interleaved_uv) {
1391             width[1] = obj_image->image.width / 2;
1392             height[1] = obj_image->image.height / 2;
1393             pitch[1] = obj_image->image.pitches[1];
1394             offset[1] = obj_image->image.offsets[1];
1395         } else {
1396             width[1] = obj_image->image.width / 2;
1397             height[1] = obj_image->image.height / 2;
1398             pitch[1] = obj_image->image.pitches[U];
1399             offset[1] = obj_image->image.offsets[U];
1400             width[2] = obj_image->image.width / 2;
1401             height[2] = obj_image->image.height / 2;
1402             pitch[2] = obj_image->image.pitches[V];
1403             offset[2] = obj_image->image.offsets[V];
1404         }
1405     }
1406
1407     if (is_target) {
1408         gen7_pp_set_surface_state(ctx, pp_context,
1409                                   bo, 0,
1410                                   width[0] / 4, height[0], pitch[0],
1411                                   I965_SURFACEFORMAT_R8_SINT,
1412                                   base_index, 1);
1413
1414         if (interleaved_uv) {
1415             gen7_pp_set_surface_state(ctx, pp_context,
1416                                       bo, offset[1],
1417                                       width[1] / 2, height[1], pitch[1],
1418                                       I965_SURFACEFORMAT_R8G8_SINT,
1419                                       base_index + 1, 1);
1420         } else {
1421             gen7_pp_set_surface_state(ctx, pp_context,
1422                                       bo, offset[1],
1423                                       width[1] / 4, height[1], pitch[1],
1424                                       I965_SURFACEFORMAT_R8_SINT,
1425                                       base_index + 1, 1);
1426             gen7_pp_set_surface_state(ctx, pp_context,
1427                                       bo, offset[2],
1428                                       width[2] / 4, height[2], pitch[2],
1429                                       I965_SURFACEFORMAT_R8_SINT,
1430                                       base_index + 2, 1);
1431         }
1432     } else {
1433         gen7_pp_set_surface2_state(ctx, pp_context,
1434                                    bo, offset[0],
1435                                    width[0], height[0], pitch[0],
1436                                    0, 0,
1437                                    SURFACE_FORMAT_Y8_UNORM, 0,
1438                                    base_index);
1439
1440         if (interleaved_uv) {
1441             gen7_pp_set_surface2_state(ctx, pp_context,
1442                                        bo, offset[1],
1443                                        width[1], height[1], pitch[1],
1444                                        0, 0,
1445                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1446                                        base_index + 1);
1447         } else {
1448             gen7_pp_set_surface2_state(ctx, pp_context,
1449                                        bo, offset[1],
1450                                        width[1], height[1], pitch[1],
1451                                        0, 0,
1452                                        SURFACE_FORMAT_R8_UNORM, 0,
1453                                        base_index + 1);
1454             gen7_pp_set_surface2_state(ctx, pp_context,
1455                                        bo, offset[2],
1456                                        width[2], height[2], pitch[2],
1457                                        0, 0,
1458                                        SURFACE_FORMAT_R8_UNORM, 0,
1459                                        base_index + 2);
1460         }
1461     }
1462 }
1463
1464 static int
1465 pp_null_x_steps(void *private_context)
1466 {
1467     return 1;
1468 }
1469
1470 static int
1471 pp_null_y_steps(void *private_context)
1472 {
1473     return 1;
1474 }
1475
1476 static int
1477 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1478 {
1479     return 0;
1480 }
1481
1482 static VAStatus
1483 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1484                    const struct i965_surface *src_surface,
1485                    const VARectangle *src_rect,
1486                    struct i965_surface *dst_surface,
1487                    const VARectangle *dst_rect,
1488                    void *filter_param)
1489 {
1490     /* private function & data */
1491     pp_context->pp_x_steps = pp_null_x_steps;
1492     pp_context->pp_y_steps = pp_null_y_steps;
1493     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1494
1495     dst_surface->flags = src_surface->flags;
1496
1497     return VA_STATUS_SUCCESS;
1498 }
1499
1500 static int
1501 pp_load_save_x_steps(void *private_context)
1502 {
1503     return 1;
1504 }
1505
1506 static int
1507 pp_load_save_y_steps(void *private_context)
1508 {
1509     struct pp_load_save_context *pp_load_save_context = private_context;
1510
1511     return pp_load_save_context->dest_h / 8;
1512 }
1513
1514 static int
1515 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1516 {
1517     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1518
1519     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1520     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1521     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1522     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1523
1524     return 0;
1525 }
1526
1527 static VAStatus
1528 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1529                                 const struct i965_surface *src_surface,
1530                                 const VARectangle *src_rect,
1531                                 struct i965_surface *dst_surface,
1532                                 const VARectangle *dst_rect,
1533                                 void *filter_param)
1534 {
1535     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1536     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1537     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1538     int width[3], height[3], pitch[3], offset[3];
1539     const int Y = 0;
1540
1541     /* source surface */
1542     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1543                                     width, height, pitch, offset);
1544
1545     /* destination surface */
1546     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1547                                     width, height, pitch, offset);
1548
1549     /* private function & data */
1550     pp_context->pp_x_steps = pp_load_save_x_steps;
1551     pp_context->pp_y_steps = pp_load_save_y_steps;
1552     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1553     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1554     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1555
1556     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1557     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1558
1559     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1560     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1561
1562     // update u/v offset for packed yuv
1563     i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
1564     i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
1565
1566     dst_surface->flags = src_surface->flags;
1567
1568     return VA_STATUS_SUCCESS;
1569 }
1570
1571 static int
1572 pp_scaling_x_steps(void *private_context)
1573 {
1574     return 1;
1575 }
1576
1577 static int
1578 pp_scaling_y_steps(void *private_context)
1579 {
1580     struct pp_scaling_context *pp_scaling_context = private_context;
1581
1582     return pp_scaling_context->dest_h / 8;
1583 }
1584
1585 static int
1586 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1587 {
1588     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1589     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1590     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1591     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1592     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1593
1594     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1595     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1596     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1597     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1598     
1599     return 0;
1600 }
1601
1602 static VAStatus
1603 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1604                            const struct i965_surface *src_surface,
1605                            const VARectangle *src_rect,
1606                            struct i965_surface *dst_surface,
1607                            const VARectangle *dst_rect,
1608                            void *filter_param)
1609 {
1610     struct i965_driver_data *i965 = i965_driver_data(ctx);
1611     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1612     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1613     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1614     struct object_surface *obj_surface;
1615     struct i965_sampler_state *sampler_state;
1616     int in_w, in_h, in_wpitch, in_hpitch;
1617     int out_w, out_h, out_wpitch, out_hpitch;
1618
1619     /* source surface */
1620     obj_surface = SURFACE(src_surface->id);
1621     in_w = obj_surface->orig_width;
1622     in_h = obj_surface->orig_height;
1623     in_wpitch = obj_surface->width;
1624     in_hpitch = obj_surface->height;
1625
1626     /* source Y surface index 1 */
1627     i965_pp_set_surface_state(ctx, pp_context,
1628                               obj_surface->bo, 0,
1629                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1630                               1, 0);
1631
1632     /* source UV surface index 2 */
1633     i965_pp_set_surface_state(ctx, pp_context,
1634                               obj_surface->bo, in_wpitch * in_hpitch,
1635                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1636                               2, 0);
1637
1638     /* destination surface */
1639     obj_surface = SURFACE(dst_surface->id);
1640     out_w = obj_surface->orig_width;
1641     out_h = obj_surface->orig_height;
1642     out_wpitch = obj_surface->width;
1643     out_hpitch = obj_surface->height;
1644
1645     /* destination Y surface index 7 */
1646     i965_pp_set_surface_state(ctx, pp_context,
1647                               obj_surface->bo, 0,
1648                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1649                               7, 1);
1650
1651     /* destination UV surface index 8 */
1652     i965_pp_set_surface_state(ctx, pp_context,
1653                               obj_surface->bo, out_wpitch * out_hpitch,
1654                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1655                               8, 1);
1656
1657     /* sampler state */
1658     dri_bo_map(pp_context->sampler_state_table.bo, True);
1659     assert(pp_context->sampler_state_table.bo->virtual);
1660     sampler_state = pp_context->sampler_state_table.bo->virtual;
1661
1662     /* SIMD16 Y index 1 */
1663     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1664     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1665     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1666     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1667     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1668
1669     /* SIMD16 UV index 2 */
1670     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1671     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1672     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1673     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1674     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1675
1676     dri_bo_unmap(pp_context->sampler_state_table.bo);
1677
1678     /* private function & data */
1679     pp_context->pp_x_steps = pp_scaling_x_steps;
1680     pp_context->pp_y_steps = pp_scaling_y_steps;
1681     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1682
1683     pp_scaling_context->dest_x = dst_rect->x;
1684     pp_scaling_context->dest_y = dst_rect->y;
1685     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1686     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1687     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1688     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1689
1690     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1691
1692     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1693     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1694     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1695     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1696     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1697
1698     dst_surface->flags = src_surface->flags;
1699
1700     return VA_STATUS_SUCCESS;
1701 }
1702
1703 static int
1704 pp_avs_x_steps(void *private_context)
1705 {
1706     struct pp_avs_context *pp_avs_context = private_context;
1707
1708     return pp_avs_context->dest_w / 16;
1709 }
1710
1711 static int
1712 pp_avs_y_steps(void *private_context)
1713 {
1714     return 1;
1715 }
1716
1717 static int
1718 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1719 {
1720     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1721     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1722     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1723     float src_x_steping, src_y_steping, video_step_delta;
1724     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1725
1726     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1727         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1728         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1729     } else if (tmp_w >= pp_avs_context->dest_w) {
1730         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1731         pp_inline_parameter->grf6.video_step_delta = 0;
1732         
1733         if (x == 0) {
1734             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1735                 pp_avs_context->src_normalized_x;
1736         } else {
1737             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1738             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1739             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1740                 16 * 15 * video_step_delta / 2;
1741         }
1742     } else {
1743         int n0, n1, n2, nls_left, nls_right;
1744         int factor_a = 5, factor_b = 4;
1745         float f;
1746
1747         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1748         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1749         n2 = tmp_w / (16 * factor_a);
1750         nls_left = n0 + n2;
1751         nls_right = n1 + n2;
1752         f = (float) n2 * 16 / tmp_w;
1753         
1754         if (n0 < 5) {
1755             pp_inline_parameter->grf6.video_step_delta = 0.0;
1756
1757             if (x == 0) {
1758                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1759                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1760             } else {
1761                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1762                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1763                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1764                     16 * 15 * video_step_delta / 2;
1765             }
1766         } else {
1767             if (x < nls_left) {
1768                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1769                 float a = f / (nls_left * 16 * factor_b);
1770                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1771                 
1772                 pp_inline_parameter->grf6.video_step_delta = b;
1773
1774                 if (x == 0) {
1775                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1776                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1777                 } else {
1778                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1779                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1780                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1781                         16 * 15 * video_step_delta / 2;
1782                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1783                 }
1784             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1785                 /* scale the center linearly */
1786                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1787                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1788                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1789                     16 * 15 * video_step_delta / 2;
1790                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1791                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1792             } else {
1793                 float a = f / (nls_right * 16 * factor_b);
1794                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1795
1796                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1797                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1798                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1799                     16 * 15 * video_step_delta / 2;
1800                 pp_inline_parameter->grf6.video_step_delta = -b;
1801
1802                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1803                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1804                 else
1805                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1806             }
1807         }
1808     }
1809
1810     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1811     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1812     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1813     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1814
1815     return 0;
1816 }
1817
1818 static VAStatus
1819 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1820                        const struct i965_surface *src_surface,
1821                        const VARectangle *src_rect,
1822                        struct i965_surface *dst_surface,
1823                        const VARectangle *dst_rect,
1824                        void *filter_param,
1825                        int nlas)
1826 {
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1829     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1830     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1831     struct object_surface *obj_surface;
1832     struct i965_sampler_8x8 *sampler_8x8;
1833     struct i965_sampler_8x8_state *sampler_8x8_state;
1834     int index;
1835     int in_w, in_h, in_wpitch, in_hpitch;
1836     int out_w, out_h, out_wpitch, out_hpitch;
1837     int i;
1838
1839     /* surface */
1840     obj_surface = SURFACE(src_surface->id);
1841     in_w = obj_surface->orig_width;
1842     in_h = obj_surface->orig_height;
1843     in_wpitch = obj_surface->width;
1844     in_hpitch = obj_surface->height;
1845
1846     /* source Y surface index 1 */
1847     i965_pp_set_surface2_state(ctx, pp_context,
1848                                obj_surface->bo, 0,
1849                                in_w, in_h, in_wpitch,
1850                                0, 0,
1851                                SURFACE_FORMAT_Y8_UNORM, 0,
1852                                1);
1853
1854     /* source UV surface index 2 */
1855     i965_pp_set_surface2_state(ctx, pp_context,
1856                                obj_surface->bo, in_wpitch * in_hpitch,
1857                                in_w / 2, in_h / 2, in_wpitch,
1858                                0, 0,
1859                                SURFACE_FORMAT_R8B8_UNORM, 0,
1860                                2);
1861
1862     /* destination surface */
1863     obj_surface = SURFACE(dst_surface->id);
1864     out_w = obj_surface->orig_width;
1865     out_h = obj_surface->orig_height;
1866     out_wpitch = obj_surface->width;
1867     out_hpitch = obj_surface->height;
1868     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1869
1870     /* destination Y surface index 7 */
1871     i965_pp_set_surface_state(ctx, pp_context,
1872                               obj_surface->bo, 0,
1873                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1874                               7, 1);
1875
1876     /* destination UV surface index 8 */
1877     i965_pp_set_surface_state(ctx, pp_context,
1878                               obj_surface->bo, out_wpitch * out_hpitch,
1879                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1880                               8, 1);
1881
1882     /* sampler 8x8 state */
1883     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1884     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1885     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1886     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1887     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1888
1889     for (i = 0; i < 17; i++) {
1890         /* for Y channel, currently ignore */
1891         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1892         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1893         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1894         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1895         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1896         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1897         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1898         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1899         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1900         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1901         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1902         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1903         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1904         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1905         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1906         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1907         /* for U/V channel, 0.25 */
1908         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1909         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1910         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1911         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1912         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1913         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1914         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1915         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1916         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1917         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1918         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1919         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1920         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1921         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1922         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1923         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1924     }
1925
1926     sampler_8x8_state->dw136.default_sharpness_level = 0;
1927     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1928     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1929     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1930     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1931
1932     /* sampler 8x8 */
1933     dri_bo_map(pp_context->sampler_state_table.bo, True);
1934     assert(pp_context->sampler_state_table.bo->virtual);
1935     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1936     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1937
1938     /* sample_8x8 Y index 1 */
1939     index = 1;
1940     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1941     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1942     sampler_8x8[index].dw0.ief_bypass = 1;
1943     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1944     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1945     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1946     sampler_8x8[index].dw2.global_noise_estimation = 22;
1947     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1948     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1949     sampler_8x8[index].dw3.strong_edge_weight = 7;
1950     sampler_8x8[index].dw3.regular_weight = 2;
1951     sampler_8x8[index].dw3.non_edge_weight = 0;
1952     sampler_8x8[index].dw3.gain_factor = 40;
1953     sampler_8x8[index].dw4.steepness_boost = 0;
1954     sampler_8x8[index].dw4.steepness_threshold = 0;
1955     sampler_8x8[index].dw4.mr_boost = 0;
1956     sampler_8x8[index].dw4.mr_threshold = 5;
1957     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1958     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1959     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1960     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1961     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1962     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1963     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1964     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1965     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1966     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1967     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1968     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1969     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1970     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1971     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1972     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1973     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1974     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1975     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1976     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1977     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1978     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1979     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1980     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1981     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1982     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1983     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1984     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1985     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1986     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1987     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1988     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1989     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1990     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1991     sampler_8x8[index].dw13.limiter_boost = 0;
1992     sampler_8x8[index].dw13.minimum_limiter = 10;
1993     sampler_8x8[index].dw13.maximum_limiter = 11;
1994     sampler_8x8[index].dw14.clip_limiter = 130;
1995     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1996                       I915_GEM_DOMAIN_RENDER, 
1997                       0,
1998                       0,
1999                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2000                       pp_context->sampler_state_table.bo_8x8);
2001
2002     /* sample_8x8 UV index 2 */
2003     index = 2;
2004     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2005     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2006     sampler_8x8[index].dw0.ief_bypass = 1;
2007     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2008     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2009     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2010     sampler_8x8[index].dw2.global_noise_estimation = 22;
2011     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2012     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2013     sampler_8x8[index].dw3.strong_edge_weight = 7;
2014     sampler_8x8[index].dw3.regular_weight = 2;
2015     sampler_8x8[index].dw3.non_edge_weight = 0;
2016     sampler_8x8[index].dw3.gain_factor = 40;
2017     sampler_8x8[index].dw4.steepness_boost = 0;
2018     sampler_8x8[index].dw4.steepness_threshold = 0;
2019     sampler_8x8[index].dw4.mr_boost = 0;
2020     sampler_8x8[index].dw4.mr_threshold = 5;
2021     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2022     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2023     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2024     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2025     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2026     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2027     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2028     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2029     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2030     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2031     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2032     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2033     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2034     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2035     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2036     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2037     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2038     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2039     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2040     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2041     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2042     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2043     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2044     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2045     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2046     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2047     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2048     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2049     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2050     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2051     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2052     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2053     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2054     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2055     sampler_8x8[index].dw13.limiter_boost = 0;
2056     sampler_8x8[index].dw13.minimum_limiter = 10;
2057     sampler_8x8[index].dw13.maximum_limiter = 11;
2058     sampler_8x8[index].dw14.clip_limiter = 130;
2059     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2060                       I915_GEM_DOMAIN_RENDER, 
2061                       0,
2062                       0,
2063                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2064                       pp_context->sampler_state_table.bo_8x8);
2065
2066     dri_bo_unmap(pp_context->sampler_state_table.bo);
2067
2068     /* private function & data */
2069     pp_context->pp_x_steps = pp_avs_x_steps;
2070     pp_context->pp_y_steps = pp_avs_y_steps;
2071     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2072
2073     pp_avs_context->dest_x = dst_rect->x;
2074     pp_avs_context->dest_y = dst_rect->y;
2075     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2076     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2077     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
2078     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2079     pp_avs_context->src_w = src_rect->width;
2080     pp_avs_context->src_h = src_rect->height;
2081
2082     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2083     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2084
2085     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
2086     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2087     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2088     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2089     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2090     pp_inline_parameter->grf6.video_step_delta = 0.0;
2091
2092     dst_surface->flags = src_surface->flags;
2093
2094     return VA_STATUS_SUCCESS;
2095 }
2096
2097 static VAStatus
2098 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2099                             const struct i965_surface *src_surface,
2100                             const VARectangle *src_rect,
2101                             struct i965_surface *dst_surface,
2102                             const VARectangle *dst_rect,
2103                             void *filter_param)
2104 {
2105     return pp_nv12_avs_initialize(ctx, pp_context,
2106                                   src_surface,
2107                                   src_rect,
2108                                   dst_surface,
2109                                   dst_rect,
2110                                   filter_param,
2111                                   1);
2112 }
2113
2114 static VAStatus
2115 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2116                              const struct i965_surface *src_surface,
2117                              const VARectangle *src_rect,
2118                              struct i965_surface *dst_surface,
2119                              const VARectangle *dst_rect,
2120                              void *filter_param)
2121 {
2122     return pp_nv12_avs_initialize(ctx, pp_context,
2123                                   src_surface,
2124                                   src_rect,
2125                                   dst_surface,
2126                                   dst_rect,
2127                                   filter_param,
2128                                   0);    
2129 }
2130
2131 static int
2132 gen7_pp_avs_x_steps(void *private_context)
2133 {
2134     struct pp_avs_context *pp_avs_context = private_context;
2135
2136     return pp_avs_context->dest_w / 16;
2137 }
2138
2139 static int
2140 gen7_pp_avs_y_steps(void *private_context)
2141 {
2142     struct pp_avs_context *pp_avs_context = private_context;
2143
2144     return pp_avs_context->dest_h / 16;
2145 }
2146
2147 static int
2148 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2149 {
2150     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2151     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2152
2153     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2154     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2155     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2156     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2157
2158     return 0;
2159 }
2160
2161 static VAStatus
2162 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2163                            const struct i965_surface *src_surface,
2164                            const VARectangle *src_rect,
2165                            struct i965_surface *dst_surface,
2166                            const VARectangle *dst_rect,
2167                            void *filter_param)
2168 {
2169     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2170     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2171     struct gen7_sampler_8x8 *sampler_8x8;
2172     struct i965_sampler_8x8_state *sampler_8x8_state;
2173     int index, i;
2174     int width[3], height[3], pitch[3], offset[3];
2175
2176     /* source surface */
2177     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2178                                          width, height, pitch, offset);
2179
2180     /* destination surface */
2181     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2182                                          width, height, pitch, offset);
2183
2184     /* sampler 8x8 state */
2185     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2186     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2187     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2188     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2189     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2190
2191     for (i = 0; i < 17; i++) {
2192         /* for Y channel, currently ignore */
2193         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2194         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2195         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2196         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2197         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2198         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2199         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2200         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2201         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2202         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2203         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2204         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2205         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2206         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2207         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2208         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2209         /* for U/V channel, 0.25 */
2210         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2211         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2212         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2213         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2214         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2215         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2216         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2217         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2218         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2219         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2220         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2221         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2222         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2223         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2224         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2225         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2226     }
2227
2228     sampler_8x8_state->dw136.default_sharpness_level = 0;
2229     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2230     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2231     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2232     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2233
2234     /* sampler 8x8 */
2235     dri_bo_map(pp_context->sampler_state_table.bo, True);
2236     assert(pp_context->sampler_state_table.bo->virtual);
2237     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2238     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2239
2240     /* sample_8x8 Y index 4 */
2241     index = 4;
2242     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2243     sampler_8x8[index].dw0.global_noise_estimation = 255;
2244     sampler_8x8[index].dw0.ief_bypass = 1;
2245
2246     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2247
2248     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2249     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2250     sampler_8x8[index].dw2.r5x_coefficient = 9;
2251     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2252     sampler_8x8[index].dw2.r5c_coefficient = 3;
2253
2254     sampler_8x8[index].dw3.r3x_coefficient = 27;
2255     sampler_8x8[index].dw3.r3c_coefficient = 5;
2256     sampler_8x8[index].dw3.gain_factor = 40;
2257     sampler_8x8[index].dw3.non_edge_weight = 1;
2258     sampler_8x8[index].dw3.regular_weight = 2;
2259     sampler_8x8[index].dw3.strong_edge_weight = 7;
2260     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2261
2262     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2263                       I915_GEM_DOMAIN_RENDER, 
2264                       0,
2265                       0,
2266                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2267                       pp_context->sampler_state_table.bo_8x8);
2268
2269     /* sample_8x8 UV index 8 */
2270     index = 8;
2271     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2272     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2273     sampler_8x8[index].dw0.global_noise_estimation = 255;
2274     sampler_8x8[index].dw0.ief_bypass = 1;
2275     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2276     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2277     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2278     sampler_8x8[index].dw2.r5x_coefficient = 9;
2279     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2280     sampler_8x8[index].dw2.r5c_coefficient = 3;
2281     sampler_8x8[index].dw3.r3x_coefficient = 27;
2282     sampler_8x8[index].dw3.r3c_coefficient = 5;
2283     sampler_8x8[index].dw3.gain_factor = 40;
2284     sampler_8x8[index].dw3.non_edge_weight = 1;
2285     sampler_8x8[index].dw3.regular_weight = 2;
2286     sampler_8x8[index].dw3.strong_edge_weight = 7;
2287     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2288
2289     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2290                       I915_GEM_DOMAIN_RENDER, 
2291                       0,
2292                       0,
2293                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2294                       pp_context->sampler_state_table.bo_8x8);
2295
2296     /* sampler_8x8 V, index 12 */
2297     index = 12;
2298     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2299     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2300     sampler_8x8[index].dw0.global_noise_estimation = 255;
2301     sampler_8x8[index].dw0.ief_bypass = 1;
2302     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2303     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2304     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2305     sampler_8x8[index].dw2.r5x_coefficient = 9;
2306     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2307     sampler_8x8[index].dw2.r5c_coefficient = 3;
2308     sampler_8x8[index].dw3.r3x_coefficient = 27;
2309     sampler_8x8[index].dw3.r3c_coefficient = 5;
2310     sampler_8x8[index].dw3.gain_factor = 40;
2311     sampler_8x8[index].dw3.non_edge_weight = 1;
2312     sampler_8x8[index].dw3.regular_weight = 2;
2313     sampler_8x8[index].dw3.strong_edge_weight = 7;
2314     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2315
2316     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2317                       I915_GEM_DOMAIN_RENDER, 
2318                       0,
2319                       0,
2320                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2321                       pp_context->sampler_state_table.bo_8x8);
2322
2323     dri_bo_unmap(pp_context->sampler_state_table.bo);
2324
2325     /* private function & data */
2326     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2327     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2328     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2329
2330     pp_avs_context->dest_x = dst_rect->x;
2331     pp_avs_context->dest_y = dst_rect->y;
2332     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2333     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2334     pp_avs_context->src_w = src_rect->width;
2335     pp_avs_context->src_h = src_rect->height;
2336
2337     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2338     dw = MAX(dw, pp_avs_context->dest_w);
2339
2340     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2341     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2342     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2343     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2344     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2345
2346     dst_surface->flags = src_surface->flags;
2347
2348     return VA_STATUS_SUCCESS;
2349 }
2350
2351 static int
2352 pp_dndi_x_steps(void *private_context)
2353 {
2354     return 1;
2355 }
2356
2357 static int
2358 pp_dndi_y_steps(void *private_context)
2359 {
2360     struct pp_dndi_context *pp_dndi_context = private_context;
2361
2362     return pp_dndi_context->dest_h / 4;
2363 }
2364
2365 static int
2366 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2367 {
2368     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2369
2370     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2371     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2372
2373     return 0;
2374 }
2375
2376 static VAStatus
2377 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2378                         const struct i965_surface *src_surface,
2379                         const VARectangle *src_rect,
2380                         struct i965_surface *dst_surface,
2381                         const VARectangle *dst_rect,
2382                         void *filter_param)
2383 {
2384     struct i965_driver_data *i965 = i965_driver_data(ctx);
2385     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2386     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2387     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2388     struct object_surface *obj_surface;
2389     struct i965_sampler_dndi *sampler_dndi;
2390     int index;
2391     int w, h;
2392     int orig_w, orig_h;
2393     int dndi_top_first = 1;
2394
2395     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2396         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2397
2398     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2399         dndi_top_first = 1;
2400     else
2401         dndi_top_first = 0;
2402
2403     /* surface */
2404     obj_surface = SURFACE(src_surface->id);
2405     orig_w = obj_surface->orig_width;
2406     orig_h = obj_surface->orig_height;
2407     w = obj_surface->width;
2408     h = obj_surface->height;
2409
2410     if (pp_context->stmm.bo == NULL) {
2411         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2412                                            "STMM surface",
2413                                            w * h,
2414                                            4096);
2415         assert(pp_context->stmm.bo);
2416     }
2417
2418     /* source UV surface index 2 */
2419     i965_pp_set_surface_state(ctx, pp_context,
2420                               obj_surface->bo, w * h,
2421                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2422                               2, 0);
2423
2424     /* source YUV surface index 4 */
2425     i965_pp_set_surface2_state(ctx, pp_context,
2426                                obj_surface->bo, 0,
2427                                orig_w, orig_h, w,
2428                                0, h,
2429                                SURFACE_FORMAT_PLANAR_420_8, 1,
2430                                4);
2431
2432     /* source STMM surface index 20 */
2433     i965_pp_set_surface_state(ctx, pp_context,
2434                               pp_context->stmm.bo, 0,
2435                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2436                               20, 1);
2437
2438     /* destination surface */
2439     obj_surface = SURFACE(dst_surface->id);
2440     orig_w = obj_surface->orig_width;
2441     orig_h = obj_surface->orig_height;
2442     w = obj_surface->width;
2443     h = obj_surface->height;
2444
2445     /* destination Y surface index 7 */
2446     i965_pp_set_surface_state(ctx, pp_context,
2447                               obj_surface->bo, 0,
2448                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2449                               7, 1);
2450
2451     /* destination UV surface index 8 */
2452     i965_pp_set_surface_state(ctx, pp_context,
2453                               obj_surface->bo, w * h,
2454                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2455                               8, 1);
2456     /* sampler dndi */
2457     dri_bo_map(pp_context->sampler_state_table.bo, True);
2458     assert(pp_context->sampler_state_table.bo->virtual);
2459     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2460     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2461
2462     /* sample dndi index 1 */
2463     index = 0;
2464     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2465     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2466     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2467     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2468
2469     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2470     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2471     sampler_dndi[index].dw1.stmm_c2 = 1;
2472     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2473     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2474
2475     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2476     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2477     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2478     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2479
2480     sampler_dndi[index].dw3.maximum_stmm = 128;
2481     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2482     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2483     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2484     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2485
2486     sampler_dndi[index].dw4.sdi_delta = 8;
2487     sampler_dndi[index].dw4.sdi_threshold = 128;
2488     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2489     sampler_dndi[index].dw4.stmm_shift_up = 0;
2490     sampler_dndi[index].dw4.stmm_shift_down = 0;
2491     sampler_dndi[index].dw4.minimum_stmm = 0;
2492
2493     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2494     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2495     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2496     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2497
2498     sampler_dndi[index].dw6.dn_enable = 1;
2499     sampler_dndi[index].dw6.di_enable = 1;
2500     sampler_dndi[index].dw6.di_partial = 0;
2501     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2502     sampler_dndi[index].dw6.dndi_stream_id = 0;
2503     sampler_dndi[index].dw6.dndi_first_frame = 1;
2504     sampler_dndi[index].dw6.progressive_dn = 0;
2505     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2506     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2507     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2508
2509     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2510     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2511     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2512     sampler_dndi[index].dw7.column_width_minus1 = 0;
2513
2514     dri_bo_unmap(pp_context->sampler_state_table.bo);
2515
2516     /* private function & data */
2517     pp_context->pp_x_steps = pp_dndi_x_steps;
2518     pp_context->pp_y_steps = pp_dndi_y_steps;
2519     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2520
2521     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2522     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2523     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2524     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2525
2526     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2527     pp_inline_parameter->grf5.number_blocks = w / 16;
2528     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2529     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2530
2531     pp_dndi_context->dest_w = w;
2532     pp_dndi_context->dest_h = h;
2533
2534     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2535
2536     return VA_STATUS_SUCCESS;
2537 }
2538
2539 static int
2540 pp_dn_x_steps(void *private_context)
2541 {
2542     return 1;
2543 }
2544
2545 static int
2546 pp_dn_y_steps(void *private_context)
2547 {
2548     struct pp_dn_context *pp_dn_context = private_context;
2549
2550     return pp_dn_context->dest_h / 8;
2551 }
2552
2553 static int
2554 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2555 {
2556     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2557
2558     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2559     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2560
2561     return 0;
2562 }
2563
2564 static VAStatus
2565 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2566                       const struct i965_surface *src_surface,
2567                       const VARectangle *src_rect,
2568                       struct i965_surface *dst_surface,
2569                       const VARectangle *dst_rect,
2570                       void *filter_param)
2571 {
2572     struct i965_driver_data *i965 = i965_driver_data(ctx);
2573     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2574     struct object_surface *obj_surface;
2575     struct i965_sampler_dndi *sampler_dndi;
2576     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2577     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2578     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2579     int index;
2580     int w, h;
2581     int orig_w, orig_h;
2582     int dn_strength = 15;
2583     int dndi_top_first = 1;
2584     int dn_progressive = 0;
2585
2586     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2587         dndi_top_first = 1;
2588         dn_progressive = 1;
2589     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2590         dndi_top_first = 1;
2591         dn_progressive = 0;
2592     } else {
2593         dndi_top_first = 0;
2594         dn_progressive = 0;
2595     }
2596
2597     if (dn_filter_param) {
2598         float value = dn_filter_param->value;
2599         
2600         if (value > 1.0)
2601             value = 1.0;
2602         
2603         if (value < 0.0)
2604             value = 0.0;
2605
2606         dn_strength = (int)(value * 31.0F);
2607     }
2608
2609     /* surface */
2610     obj_surface = SURFACE(src_surface->id);
2611     orig_w = obj_surface->orig_width;
2612     orig_h = obj_surface->orig_height;
2613     w = obj_surface->width;
2614     h = obj_surface->height;
2615
2616     if (pp_context->stmm.bo == NULL) {
2617         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2618                                            "STMM surface",
2619                                            w * h,
2620                                            4096);
2621         assert(pp_context->stmm.bo);
2622     }
2623
2624     /* source UV surface index 2 */
2625     i965_pp_set_surface_state(ctx, pp_context,
2626                               obj_surface->bo, w * h,
2627                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2628                               2, 0);
2629
2630     /* source YUV surface index 4 */
2631     i965_pp_set_surface2_state(ctx, pp_context,
2632                                obj_surface->bo, 0,
2633                                orig_w, orig_h, w,
2634                                0, h,
2635                                SURFACE_FORMAT_PLANAR_420_8, 1,
2636                                4);
2637
2638     /* source STMM surface index 20 */
2639     i965_pp_set_surface_state(ctx, pp_context,
2640                               pp_context->stmm.bo, 0,
2641                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2642                               20, 1);
2643
2644     /* destination surface */
2645     obj_surface = SURFACE(dst_surface->id);
2646     orig_w = obj_surface->orig_width;
2647     orig_h = obj_surface->orig_height;
2648     w = obj_surface->width;
2649     h = obj_surface->height;
2650
2651     /* destination Y surface index 7 */
2652     i965_pp_set_surface_state(ctx, pp_context,
2653                               obj_surface->bo, 0,
2654                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2655                               7, 1);
2656
2657     /* destination UV surface index 8 */
2658     i965_pp_set_surface_state(ctx, pp_context,
2659                               obj_surface->bo, w * h,
2660                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2661                               8, 1);
2662     /* sampler dn */
2663     dri_bo_map(pp_context->sampler_state_table.bo, True);
2664     assert(pp_context->sampler_state_table.bo->virtual);
2665     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2666     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2667
2668     /* sample dndi index 1 */
2669     index = 0;
2670     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2671     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2672     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2673     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2674
2675     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2676     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2677     sampler_dndi[index].dw1.stmm_c2 = 0;
2678     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2679     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2680
2681     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2682     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2683     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2684     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2685
2686     sampler_dndi[index].dw3.maximum_stmm = 128;
2687     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2688     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2689     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2690     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2691
2692     sampler_dndi[index].dw4.sdi_delta = 8;
2693     sampler_dndi[index].dw4.sdi_threshold = 128;
2694     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2695     sampler_dndi[index].dw4.stmm_shift_up = 0;
2696     sampler_dndi[index].dw4.stmm_shift_down = 0;
2697     sampler_dndi[index].dw4.minimum_stmm = 0;
2698
2699     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2700     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2701     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2702     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2703
2704     sampler_dndi[index].dw6.dn_enable = 1;
2705     sampler_dndi[index].dw6.di_enable = 0;
2706     sampler_dndi[index].dw6.di_partial = 0;
2707     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2708     sampler_dndi[index].dw6.dndi_stream_id = 1;
2709     sampler_dndi[index].dw6.dndi_first_frame = 1;
2710     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2711     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2712     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2713     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2714
2715     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2716     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2717     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2718     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2719
2720     dri_bo_unmap(pp_context->sampler_state_table.bo);
2721
2722     /* private function & data */
2723     pp_context->pp_x_steps = pp_dn_x_steps;
2724     pp_context->pp_y_steps = pp_dn_y_steps;
2725     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2726
2727     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2728     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2729     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2730     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2731
2732     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2733     pp_inline_parameter->grf5.number_blocks = w / 16;
2734     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2735     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2736
2737     pp_dn_context->dest_w = w;
2738     pp_dn_context->dest_h = h;
2739
2740     dst_surface->flags = src_surface->flags;
2741     
2742     return VA_STATUS_SUCCESS;
2743 }
2744
2745 static int
2746 gen7_pp_dndi_x_steps(void *private_context)
2747 {
2748     struct pp_dndi_context *pp_dndi_context = private_context;
2749
2750     return pp_dndi_context->dest_w / 16;
2751 }
2752
2753 static int
2754 gen7_pp_dndi_y_steps(void *private_context)
2755 {
2756     struct pp_dndi_context *pp_dndi_context = private_context;
2757
2758     return pp_dndi_context->dest_h / 4;
2759 }
2760
2761 static int
2762 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2763 {
2764     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2765
2766     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2767     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2768
2769     return 0;
2770 }
2771
2772 static VAStatus
2773 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2774                              const struct i965_surface *src_surface,
2775                              const VARectangle *src_rect,
2776                              struct i965_surface *dst_surface,
2777                              const VARectangle *dst_rect,
2778                              void *filter_param)
2779 {
2780     struct i965_driver_data *i965 = i965_driver_data(ctx);
2781     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2782     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2783     struct object_surface *obj_surface;
2784     struct gen7_sampler_dndi *sampler_dndi;
2785     int index;
2786     int w, h;
2787     int orig_w, orig_h;
2788     int dndi_top_first = 1;
2789
2790     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2791         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2792
2793     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2794         dndi_top_first = 1;
2795     else
2796         dndi_top_first = 0;
2797
2798     /* surface */
2799     obj_surface = SURFACE(src_surface->id);
2800     orig_w = obj_surface->orig_width;
2801     orig_h = obj_surface->orig_height;
2802     w = obj_surface->width;
2803     h = obj_surface->height;
2804
2805     if (pp_context->stmm.bo == NULL) {
2806         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2807                                            "STMM surface",
2808                                            w * h,
2809                                            4096);
2810         assert(pp_context->stmm.bo);
2811     }
2812
2813     /* source UV surface index 1 */
2814     gen7_pp_set_surface_state(ctx, pp_context,
2815                               obj_surface->bo, w * h,
2816                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2817                               1, 0);
2818
2819     /* source YUV surface index 3 */
2820     gen7_pp_set_surface2_state(ctx, pp_context,
2821                                obj_surface->bo, 0,
2822                                orig_w, orig_h, w,
2823                                0, h,
2824                                SURFACE_FORMAT_PLANAR_420_8, 1,
2825                                3);
2826
2827     /* source (temporal reference) YUV surface index 4 */
2828     gen7_pp_set_surface2_state(ctx, pp_context,
2829                                obj_surface->bo, 0,
2830                                orig_w, orig_h, w,
2831                                0, h,
2832                                SURFACE_FORMAT_PLANAR_420_8, 1,
2833                                4);
2834
2835     /* STMM / History Statistics input surface, index 5 */
2836     gen7_pp_set_surface_state(ctx, pp_context,
2837                               pp_context->stmm.bo, 0,
2838                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2839                               5, 1);
2840
2841     /* destination surface */
2842     obj_surface = SURFACE(dst_surface->id);
2843     orig_w = obj_surface->orig_width;
2844     orig_h = obj_surface->orig_height;
2845     w = obj_surface->width;
2846     h = obj_surface->height;
2847
2848     /* destination(Previous frame) Y surface index 27 */
2849     gen7_pp_set_surface_state(ctx, pp_context,
2850                               obj_surface->bo, 0,
2851                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2852                               27, 1);
2853
2854     /* destination(Previous frame) UV surface index 28 */
2855     gen7_pp_set_surface_state(ctx, pp_context,
2856                               obj_surface->bo, w * h,
2857                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2858                               28, 1);
2859
2860     /* destination(Current frame) Y surface index 30 */
2861     gen7_pp_set_surface_state(ctx, pp_context,
2862                               obj_surface->bo, 0,
2863                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2864                               30, 1);
2865
2866     /* destination(Current frame) UV surface index 31 */
2867     gen7_pp_set_surface_state(ctx, pp_context,
2868                               obj_surface->bo, w * h,
2869                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2870                               31, 1);
2871
2872     /* STMM output surface, index 33 */
2873     gen7_pp_set_surface_state(ctx, pp_context,
2874                               pp_context->stmm.bo, 0,
2875                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2876                               33, 1);
2877
2878
2879     /* sampler dndi */
2880     dri_bo_map(pp_context->sampler_state_table.bo, True);
2881     assert(pp_context->sampler_state_table.bo->virtual);
2882     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2883     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2884
2885     /* sample dndi index 0 */
2886     index = 0;
2887     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2888     sampler_dndi[index].dw0.dnmh_delt = 8;
2889     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2890     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2891     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2892     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2893
2894     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2895     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2896     sampler_dndi[index].dw1.stmm_c2 = 0;
2897     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2898     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2899
2900     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2901     sampler_dndi[index].dw2.bne_edge_th = 1;
2902     sampler_dndi[index].dw2.smooth_mv_th = 0;
2903     sampler_dndi[index].dw2.sad_tight_th = 5;
2904     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2905     sampler_dndi[index].dw2.good_neighbor_th = 4;
2906
2907     sampler_dndi[index].dw3.maximum_stmm = 128;
2908     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2909     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2910     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2911     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2912
2913     sampler_dndi[index].dw4.sdi_delta = 8;
2914     sampler_dndi[index].dw4.sdi_threshold = 128;
2915     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2916     sampler_dndi[index].dw4.stmm_shift_up = 0;
2917     sampler_dndi[index].dw4.stmm_shift_down = 0;
2918     sampler_dndi[index].dw4.minimum_stmm = 0;
2919
2920     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2921     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2922     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2923     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2924
2925     sampler_dndi[index].dw6.dn_enable = 0;
2926     sampler_dndi[index].dw6.di_enable = 1;
2927     sampler_dndi[index].dw6.di_partial = 0;
2928     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2929     sampler_dndi[index].dw6.dndi_stream_id = 1;
2930     sampler_dndi[index].dw6.dndi_first_frame = 1;
2931     sampler_dndi[index].dw6.progressive_dn = 0;
2932     sampler_dndi[index].dw6.mcdi_enable = 0;
2933     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2934     sampler_dndi[index].dw6.cat_th1 = 0;
2935     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2936     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2937
2938     sampler_dndi[index].dw7.sad_tha = 5;
2939     sampler_dndi[index].dw7.sad_thb = 10;
2940     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2941     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2942     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2943     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2944     sampler_dndi[index].dw7.neighborpixel_th = 10;
2945     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2946
2947     dri_bo_unmap(pp_context->sampler_state_table.bo);
2948
2949     /* private function & data */
2950     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
2951     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
2952     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
2953
2954     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
2955     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
2956     pp_static_parameter->grf1.di_top_field_first = 0;
2957     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2958
2959     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2960     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2961     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2962
2963     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
2964     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
2965
2966     pp_dndi_context->dest_w = w;
2967     pp_dndi_context->dest_h = h;
2968
2969     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2970
2971     return VA_STATUS_SUCCESS;
2972 }
2973
2974 static int
2975 gen7_pp_dn_x_steps(void *private_context)
2976 {
2977     return 1;
2978 }
2979
2980 static int
2981 gen7_pp_dn_y_steps(void *private_context)
2982 {
2983     struct pp_dn_context *pp_dn_context = private_context;
2984
2985     return pp_dn_context->dest_h / 4;
2986 }
2987
2988 static int
2989 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2990 {
2991     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2992
2993     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2994     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2995
2996     return 0;
2997 }
2998
2999 static VAStatus
3000 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3001                            const struct i965_surface *src_surface,
3002                            const VARectangle *src_rect,
3003                            struct i965_surface *dst_surface,
3004                            const VARectangle *dst_rect,
3005                            void *filter_param)
3006 {
3007     struct i965_driver_data *i965 = i965_driver_data(ctx);
3008     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3009     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3010     struct object_surface *obj_surface;
3011     struct gen7_sampler_dndi *sampler_dn;
3012     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3013     int index;
3014     int w, h;
3015     int orig_w, orig_h;
3016     int dn_strength = 15;
3017     int dndi_top_first = 1;
3018     int dn_progressive = 0;
3019
3020     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3021         dndi_top_first = 1;
3022         dn_progressive = 1;
3023     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3024         dndi_top_first = 1;
3025         dn_progressive = 0;
3026     } else {
3027         dndi_top_first = 0;
3028         dn_progressive = 0;
3029     }
3030
3031     if (dn_filter_param) {
3032         float value = dn_filter_param->value;
3033         
3034         if (value > 1.0)
3035             value = 1.0;
3036         
3037         if (value < 0.0)
3038             value = 0.0;
3039
3040         dn_strength = (int)(value * 31.0F);
3041     }
3042
3043     /* surface */
3044     obj_surface = SURFACE(src_surface->id);
3045     orig_w = obj_surface->orig_width;
3046     orig_h = obj_surface->orig_height;
3047     w = obj_surface->width;
3048     h = obj_surface->height;
3049
3050     if (pp_context->stmm.bo == NULL) {
3051         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3052                                            "STMM surface",
3053                                            w * h,
3054                                            4096);
3055         assert(pp_context->stmm.bo);
3056     }
3057
3058     /* source UV surface index 1 */
3059     gen7_pp_set_surface_state(ctx, pp_context,
3060                               obj_surface->bo, w * h,
3061                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3062                               1, 0);
3063
3064     /* source YUV surface index 3 */
3065     gen7_pp_set_surface2_state(ctx, pp_context,
3066                                obj_surface->bo, 0,
3067                                orig_w, orig_h, w,
3068                                0, h,
3069                                SURFACE_FORMAT_PLANAR_420_8, 1,
3070                                3);
3071
3072     /* source STMM surface index 5 */
3073     gen7_pp_set_surface_state(ctx, pp_context,
3074                               pp_context->stmm.bo, 0,
3075                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3076                               5, 1);
3077
3078     /* destination surface */
3079     obj_surface = SURFACE(dst_surface->id);
3080     orig_w = obj_surface->orig_width;
3081     orig_h = obj_surface->orig_height;
3082     w = obj_surface->width;
3083     h = obj_surface->height;
3084
3085     /* destination Y surface index 7 */
3086     gen7_pp_set_surface_state(ctx, pp_context,
3087                               obj_surface->bo, 0,
3088                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3089                               7, 1);
3090
3091     /* destination UV surface index 8 */
3092     gen7_pp_set_surface_state(ctx, pp_context,
3093                               obj_surface->bo, w * h,
3094                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3095                               8, 1);
3096     /* sampler dn */
3097     dri_bo_map(pp_context->sampler_state_table.bo, True);
3098     assert(pp_context->sampler_state_table.bo->virtual);
3099     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3100     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3101
3102     /* sample dn index 1 */
3103     index = 0;
3104     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3105     sampler_dn[index].dw0.dnmh_delt = 8;
3106     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3107     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3108     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3109     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3110
3111     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3112     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3113     sampler_dn[index].dw1.stmm_c2 = 0;
3114     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3115     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3116
3117     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3118     sampler_dn[index].dw2.bne_edge_th = 1;
3119     sampler_dn[index].dw2.smooth_mv_th = 0;
3120     sampler_dn[index].dw2.sad_tight_th = 5;
3121     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3122     sampler_dn[index].dw2.good_neighbor_th = 4;
3123
3124     sampler_dn[index].dw3.maximum_stmm = 128;
3125     sampler_dn[index].dw3.multipler_for_vecm = 2;
3126     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3127     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3128     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3129
3130     sampler_dn[index].dw4.sdi_delta = 8;
3131     sampler_dn[index].dw4.sdi_threshold = 128;
3132     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3133     sampler_dn[index].dw4.stmm_shift_up = 0;
3134     sampler_dn[index].dw4.stmm_shift_down = 0;
3135     sampler_dn[index].dw4.minimum_stmm = 0;
3136
3137     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3138     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3139     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3140     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3141
3142     sampler_dn[index].dw6.dn_enable = 1;
3143     sampler_dn[index].dw6.di_enable = 0;
3144     sampler_dn[index].dw6.di_partial = 0;
3145     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3146     sampler_dn[index].dw6.dndi_stream_id = 1;
3147     sampler_dn[index].dw6.dndi_first_frame = 1;
3148     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3149     sampler_dn[index].dw6.mcdi_enable = 0;
3150     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3151     sampler_dn[index].dw6.cat_th1 = 0;
3152     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3153     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3154
3155     sampler_dn[index].dw7.sad_tha = 5;
3156     sampler_dn[index].dw7.sad_thb = 10;
3157     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3158     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3159     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3160     sampler_dn[index].dw7.vdi_walker_enable = 0;
3161     sampler_dn[index].dw7.neighborpixel_th = 10;
3162     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3163
3164     dri_bo_unmap(pp_context->sampler_state_table.bo);
3165
3166     /* private function & data */
3167     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3168     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3169     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3170
3171     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3172     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3173     pp_static_parameter->grf1.di_top_field_first = 0;
3174     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3175
3176     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3177     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3178     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3179
3180     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3181     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3182
3183     pp_dn_context->dest_w = w;
3184     pp_dn_context->dest_h = h;
3185
3186     dst_surface->flags = src_surface->flags;
3187
3188     return VA_STATUS_SUCCESS;
3189 }
3190
3191 static VAStatus
3192 ironlake_pp_initialize(
3193     VADriverContextP   ctx,
3194     struct i965_post_processing_context *pp_context,
3195     const struct i965_surface *src_surface,
3196     const VARectangle *src_rect,
3197     struct i965_surface *dst_surface,
3198     const VARectangle *dst_rect,
3199     int                pp_index,
3200     void *filter_param
3201 )
3202 {
3203     VAStatus va_status;
3204     struct i965_driver_data *i965 = i965_driver_data(ctx);
3205     struct pp_module *pp_module;
3206     dri_bo *bo;
3207     int static_param_size, inline_param_size;
3208
3209     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3210     bo = dri_bo_alloc(i965->intel.bufmgr,
3211                       "surface state & binding table",
3212                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3213                       4096);
3214     assert(bo);
3215     pp_context->surface_state_binding_table.bo = bo;
3216
3217     dri_bo_unreference(pp_context->curbe.bo);
3218     bo = dri_bo_alloc(i965->intel.bufmgr,
3219                       "constant buffer",
3220                       4096, 
3221                       4096);
3222     assert(bo);
3223     pp_context->curbe.bo = bo;
3224
3225     dri_bo_unreference(pp_context->idrt.bo);
3226     bo = dri_bo_alloc(i965->intel.bufmgr, 
3227                       "interface discriptor", 
3228                       sizeof(struct i965_interface_descriptor), 
3229                       4096);
3230     assert(bo);
3231     pp_context->idrt.bo = bo;
3232     pp_context->idrt.num_interface_descriptors = 0;
3233
3234     dri_bo_unreference(pp_context->sampler_state_table.bo);
3235     bo = dri_bo_alloc(i965->intel.bufmgr, 
3236                       "sampler state table", 
3237                       4096,
3238                       4096);
3239     assert(bo);
3240     dri_bo_map(bo, True);
3241     memset(bo->virtual, 0, bo->size);
3242     dri_bo_unmap(bo);
3243     pp_context->sampler_state_table.bo = bo;
3244
3245     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3246     bo = dri_bo_alloc(i965->intel.bufmgr, 
3247                       "sampler 8x8 state ",
3248                       4096,
3249                       4096);
3250     assert(bo);
3251     pp_context->sampler_state_table.bo_8x8 = bo;
3252
3253     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3254     bo = dri_bo_alloc(i965->intel.bufmgr, 
3255                       "sampler 8x8 state ",
3256                       4096,
3257                       4096);
3258     assert(bo);
3259     pp_context->sampler_state_table.bo_8x8_uv = bo;
3260
3261     dri_bo_unreference(pp_context->vfe_state.bo);
3262     bo = dri_bo_alloc(i965->intel.bufmgr, 
3263                       "vfe state", 
3264                       sizeof(struct i965_vfe_state), 
3265                       4096);
3266     assert(bo);
3267     pp_context->vfe_state.bo = bo;
3268
3269     static_param_size = sizeof(struct pp_static_parameter);
3270     inline_param_size = sizeof(struct pp_inline_parameter);
3271
3272     memset(pp_context->pp_static_parameter, 0, static_param_size);
3273     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3274     
3275     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3276     pp_context->current_pp = pp_index;
3277     pp_module = &pp_context->pp_modules[pp_index];
3278     
3279     if (pp_module->initialize)
3280         va_status = pp_module->initialize(ctx, pp_context,
3281                                           src_surface,
3282                                           src_rect,
3283                                           dst_surface,
3284                                           dst_rect,
3285                                           filter_param);
3286     else
3287         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3288
3289     return va_status;
3290 }
3291
3292 static VAStatus
3293 ironlake_post_processing(
3294     VADriverContextP   ctx,
3295     struct i965_post_processing_context *pp_context,
3296     const struct i965_surface *src_surface,
3297     const VARectangle *src_rect,
3298     struct i965_surface *dst_surface,
3299     const VARectangle *dst_rect,
3300     int                pp_index,
3301     void *filter_param
3302 )
3303 {
3304     VAStatus va_status;
3305
3306     va_status = ironlake_pp_initialize(ctx, pp_context,
3307                                        src_surface,
3308                                        src_rect,
3309                                        dst_surface,
3310                                        dst_rect,
3311                                        pp_index,
3312                                        filter_param);
3313
3314     if (va_status == VA_STATUS_SUCCESS) {
3315         ironlake_pp_states_setup(ctx, pp_context);
3316         ironlake_pp_pipeline_setup(ctx, pp_context);
3317     }
3318
3319     return va_status;
3320 }
3321
3322 static VAStatus
3323 gen6_pp_initialize(
3324     VADriverContextP   ctx,
3325     struct i965_post_processing_context *pp_context,
3326     const struct i965_surface *src_surface,
3327     const VARectangle *src_rect,
3328     struct i965_surface *dst_surface,
3329     const VARectangle *dst_rect,
3330     int                pp_index,
3331     void *filter_param
3332 )
3333 {
3334     VAStatus va_status;
3335     struct i965_driver_data *i965 = i965_driver_data(ctx);
3336     struct pp_module *pp_module;
3337     dri_bo *bo;
3338     int static_param_size, inline_param_size;
3339
3340     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3341     bo = dri_bo_alloc(i965->intel.bufmgr,
3342                       "surface state & binding table",
3343                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3344                       4096);
3345     assert(bo);
3346     pp_context->surface_state_binding_table.bo = bo;
3347
3348     dri_bo_unreference(pp_context->curbe.bo);
3349     bo = dri_bo_alloc(i965->intel.bufmgr,
3350                       "constant buffer",
3351                       4096, 
3352                       4096);
3353     assert(bo);
3354     pp_context->curbe.bo = bo;
3355
3356     dri_bo_unreference(pp_context->idrt.bo);
3357     bo = dri_bo_alloc(i965->intel.bufmgr, 
3358                       "interface discriptor", 
3359                       sizeof(struct gen6_interface_descriptor_data), 
3360                       4096);
3361     assert(bo);
3362     pp_context->idrt.bo = bo;
3363     pp_context->idrt.num_interface_descriptors = 0;
3364
3365     dri_bo_unreference(pp_context->sampler_state_table.bo);
3366     bo = dri_bo_alloc(i965->intel.bufmgr, 
3367                       "sampler state table", 
3368                       4096,
3369                       4096);
3370     assert(bo);
3371     dri_bo_map(bo, True);
3372     memset(bo->virtual, 0, bo->size);
3373     dri_bo_unmap(bo);
3374     pp_context->sampler_state_table.bo = bo;
3375
3376     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3377     bo = dri_bo_alloc(i965->intel.bufmgr, 
3378                       "sampler 8x8 state ",
3379                       4096,
3380                       4096);
3381     assert(bo);
3382     pp_context->sampler_state_table.bo_8x8 = bo;
3383
3384     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3385     bo = dri_bo_alloc(i965->intel.bufmgr, 
3386                       "sampler 8x8 state ",
3387                       4096,
3388                       4096);
3389     assert(bo);
3390     pp_context->sampler_state_table.bo_8x8_uv = bo;
3391
3392     dri_bo_unreference(pp_context->vfe_state.bo);
3393     bo = dri_bo_alloc(i965->intel.bufmgr, 
3394                       "vfe state", 
3395                       sizeof(struct i965_vfe_state), 
3396                       4096);
3397     assert(bo);
3398     pp_context->vfe_state.bo = bo;
3399     
3400     if (IS_GEN7(i965->intel.device_id)) {
3401         static_param_size = sizeof(struct gen7_pp_static_parameter);
3402         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3403     } else {
3404         static_param_size = sizeof(struct pp_static_parameter);
3405         inline_param_size = sizeof(struct pp_inline_parameter);
3406     }
3407
3408     memset(pp_context->pp_static_parameter, 0, static_param_size);
3409     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3410
3411     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3412     pp_context->current_pp = pp_index;
3413     pp_module = &pp_context->pp_modules[pp_index];
3414     
3415     if (pp_module->initialize)
3416         va_status = pp_module->initialize(ctx, pp_context,
3417                                           src_surface,
3418                                           src_rect,
3419                                           dst_surface,
3420                                           dst_rect,
3421                                           filter_param);
3422     else
3423         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3424
3425     return va_status;
3426 }
3427
3428 static void
3429 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3430                                    struct i965_post_processing_context *pp_context)
3431 {
3432     struct i965_driver_data *i965 = i965_driver_data(ctx);
3433     struct gen6_interface_descriptor_data *desc;
3434     dri_bo *bo;
3435     int pp_index = pp_context->current_pp;
3436
3437     bo = pp_context->idrt.bo;
3438     dri_bo_map(bo, True);
3439     assert(bo->virtual);
3440     desc = bo->virtual;
3441     memset(desc, 0, sizeof(*desc));
3442     desc->desc0.kernel_start_pointer = 
3443         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3444     desc->desc1.single_program_flow = 1;
3445     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3446     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3447     desc->desc2.sampler_state_pointer = 
3448         pp_context->sampler_state_table.bo->offset >> 5;
3449     desc->desc3.binding_table_entry_count = 0;
3450     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3451     desc->desc4.constant_urb_entry_read_offset = 0;
3452
3453     if (IS_GEN7(i965->intel.device_id))
3454         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3455     else
3456         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3457
3458     dri_bo_emit_reloc(bo,
3459                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3460                       0,
3461                       offsetof(struct gen6_interface_descriptor_data, desc0),
3462                       pp_context->pp_modules[pp_index].kernel.bo);
3463
3464     dri_bo_emit_reloc(bo,
3465                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3466                       desc->desc2.sampler_count << 2,
3467                       offsetof(struct gen6_interface_descriptor_data, desc2),
3468                       pp_context->sampler_state_table.bo);
3469
3470     dri_bo_unmap(bo);
3471     pp_context->idrt.num_interface_descriptors++;
3472 }
3473
3474 static void
3475 gen6_pp_upload_constants(VADriverContextP ctx,
3476                          struct i965_post_processing_context *pp_context)
3477 {
3478     struct i965_driver_data *i965 = i965_driver_data(ctx);
3479     unsigned char *constant_buffer;
3480     int param_size;
3481
3482     assert(sizeof(struct pp_static_parameter) == 128);
3483     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3484
3485     if (IS_GEN7(i965->intel.device_id))
3486         param_size = sizeof(struct gen7_pp_static_parameter);
3487     else
3488         param_size = sizeof(struct pp_static_parameter);
3489
3490     dri_bo_map(pp_context->curbe.bo, 1);
3491     assert(pp_context->curbe.bo->virtual);
3492     constant_buffer = pp_context->curbe.bo->virtual;
3493     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3494     dri_bo_unmap(pp_context->curbe.bo);
3495 }
3496
3497 static void
3498 gen6_pp_states_setup(VADriverContextP ctx,
3499                      struct i965_post_processing_context *pp_context)
3500 {
3501     gen6_pp_interface_descriptor_table(ctx, pp_context);
3502     gen6_pp_upload_constants(ctx, pp_context);
3503 }
3504
3505 static void
3506 gen6_pp_pipeline_select(VADriverContextP ctx,
3507                         struct i965_post_processing_context *pp_context)
3508 {
3509     struct intel_batchbuffer *batch = pp_context->batch;
3510
3511     BEGIN_BATCH(batch, 1);
3512     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3513     ADVANCE_BATCH(batch);
3514 }
3515
3516 static void
3517 gen6_pp_state_base_address(VADriverContextP ctx,
3518                            struct i965_post_processing_context *pp_context)
3519 {
3520     struct intel_batchbuffer *batch = pp_context->batch;
3521
3522     BEGIN_BATCH(batch, 10);
3523     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3524     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3525     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3526     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3527     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3528     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3529     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3530     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3531     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3532     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3533     ADVANCE_BATCH(batch);
3534 }
3535
3536 static void
3537 gen6_pp_vfe_state(VADriverContextP ctx,
3538                   struct i965_post_processing_context *pp_context)
3539 {
3540     struct intel_batchbuffer *batch = pp_context->batch;
3541
3542     BEGIN_BATCH(batch, 8);
3543     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3544     OUT_BATCH(batch, 0);
3545     OUT_BATCH(batch,
3546               (pp_context->urb.num_vfe_entries - 1) << 16 |
3547               pp_context->urb.num_vfe_entries << 8);
3548     OUT_BATCH(batch, 0);
3549     OUT_BATCH(batch,
3550               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3551               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3552     OUT_BATCH(batch, 0);
3553     OUT_BATCH(batch, 0);
3554     OUT_BATCH(batch, 0);
3555     ADVANCE_BATCH(batch);
3556 }
3557
3558 static void
3559 gen6_pp_curbe_load(VADriverContextP ctx,
3560                    struct i965_post_processing_context *pp_context)
3561 {
3562     struct intel_batchbuffer *batch = pp_context->batch;
3563
3564     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3565
3566     BEGIN_BATCH(batch, 4);
3567     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3568     OUT_BATCH(batch, 0);
3569     OUT_BATCH(batch,
3570               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3571     OUT_RELOC(batch, 
3572               pp_context->curbe.bo,
3573               I915_GEM_DOMAIN_INSTRUCTION, 0,
3574               0);
3575     ADVANCE_BATCH(batch);
3576 }
3577
3578 static void
3579 gen6_interface_descriptor_load(VADriverContextP ctx,
3580                                struct i965_post_processing_context *pp_context)
3581 {
3582     struct intel_batchbuffer *batch = pp_context->batch;
3583
3584     BEGIN_BATCH(batch, 4);
3585     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3586     OUT_BATCH(batch, 0);
3587     OUT_BATCH(batch,
3588               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3589     OUT_RELOC(batch, 
3590               pp_context->idrt.bo,
3591               I915_GEM_DOMAIN_INSTRUCTION, 0,
3592               0);
3593     ADVANCE_BATCH(batch);
3594 }
3595
3596 static void
3597 gen6_pp_object_walker(VADriverContextP ctx,
3598                       struct i965_post_processing_context *pp_context)
3599 {
3600     struct i965_driver_data *i965 = i965_driver_data(ctx);
3601     struct intel_batchbuffer *batch = pp_context->batch;
3602     int x, x_steps, y, y_steps;
3603     int param_size, command_length_in_dws;
3604     dri_bo *command_buffer;
3605     unsigned int *command_ptr;
3606
3607     if (IS_GEN7(i965->intel.device_id))
3608         param_size = sizeof(struct gen7_pp_inline_parameter);
3609     else
3610         param_size = sizeof(struct pp_inline_parameter);
3611
3612     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3613     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3614     command_length_in_dws = 6 + (param_size >> 2);
3615     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3616                                   "command objects buffer",
3617                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3618                                   4096);
3619
3620     dri_bo_map(command_buffer, 1);
3621     command_ptr = command_buffer->virtual;
3622
3623     for (y = 0; y < y_steps; y++) {
3624         for (x = 0; x < x_steps; x++) {
3625             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3626                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3627                 *command_ptr++ = 0;
3628                 *command_ptr++ = 0;
3629                 *command_ptr++ = 0;
3630                 *command_ptr++ = 0;
3631                 *command_ptr++ = 0;
3632                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3633                 command_ptr += (param_size >> 2);
3634             }
3635         }
3636     }
3637
3638     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3639         *command_ptr++ = 0;
3640
3641     *command_ptr = MI_BATCH_BUFFER_END;
3642
3643     dri_bo_unmap(command_buffer);
3644
3645     BEGIN_BATCH(batch, 2);
3646     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3647     OUT_RELOC(batch, command_buffer, 
3648               I915_GEM_DOMAIN_COMMAND, 0, 
3649               0);
3650     ADVANCE_BATCH(batch);
3651     
3652     dri_bo_unreference(command_buffer);
3653
3654     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3655      * will cause control to pass back to ring buffer 
3656      */
3657     intel_batchbuffer_end_atomic(batch);
3658     intel_batchbuffer_flush(batch);
3659     intel_batchbuffer_start_atomic(batch, 0x1000);
3660 }
3661
3662 static void
3663 gen6_pp_pipeline_setup(VADriverContextP ctx,
3664                        struct i965_post_processing_context *pp_context)
3665 {
3666     struct intel_batchbuffer *batch = pp_context->batch;
3667
3668     intel_batchbuffer_start_atomic(batch, 0x1000);
3669     intel_batchbuffer_emit_mi_flush(batch);
3670     gen6_pp_pipeline_select(ctx, pp_context);
3671     gen6_pp_state_base_address(ctx, pp_context);
3672     gen6_pp_vfe_state(ctx, pp_context);
3673     gen6_pp_curbe_load(ctx, pp_context);
3674     gen6_interface_descriptor_load(ctx, pp_context);
3675     gen6_pp_object_walker(ctx, pp_context);
3676     intel_batchbuffer_end_atomic(batch);
3677 }
3678
3679 static VAStatus
3680 gen6_post_processing(
3681     VADriverContextP   ctx,
3682     struct i965_post_processing_context *pp_context,
3683     const struct i965_surface *src_surface,
3684     const VARectangle *src_rect,
3685     struct i965_surface *dst_surface,
3686     const VARectangle *dst_rect,
3687     int                pp_index,
3688     void * filter_param
3689 )
3690 {
3691     VAStatus va_status;
3692     
3693     va_status = gen6_pp_initialize(ctx, pp_context,
3694                                    src_surface,
3695                                    src_rect,
3696                                    dst_surface,
3697                                    dst_rect,
3698                                    pp_index,
3699                                    filter_param);
3700
3701     if (va_status == VA_STATUS_SUCCESS) {
3702         gen6_pp_states_setup(ctx, pp_context);
3703         gen6_pp_pipeline_setup(ctx, pp_context);
3704     }
3705
3706     return va_status;
3707 }
3708
3709 static VAStatus
3710 i965_post_processing_internal(
3711     VADriverContextP   ctx,
3712     struct i965_post_processing_context *pp_context,
3713     const struct i965_surface *src_surface,
3714     const VARectangle *src_rect,
3715     struct i965_surface *dst_surface,
3716     const VARectangle *dst_rect,
3717     int                pp_index,
3718     void *filter_param
3719 )
3720 {
3721     struct i965_driver_data *i965 = i965_driver_data(ctx);
3722     VAStatus va_status;
3723
3724     if (IS_GEN6(i965->intel.device_id) ||
3725         IS_GEN7(i965->intel.device_id))
3726         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3727     else
3728         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3729     
3730     return va_status;
3731 }
3732
3733 VAStatus 
3734 i965_DestroySurfaces(VADriverContextP ctx,
3735                      VASurfaceID *surface_list,
3736                      int num_surfaces);
3737 VAStatus 
3738 i965_CreateSurfaces(VADriverContextP ctx,
3739                     int width,
3740                     int height,
3741                     int format,
3742                     int num_surfaces,
3743                     VASurfaceID *surfaces);
3744
3745 static void
3746 rgb_to_yuv(unsigned int argb,
3747            unsigned char *y,
3748            unsigned char *u,
3749            unsigned char *v,
3750            unsigned char *a)
3751 {
3752     int r = ((argb >> 16) & 0xff);
3753     int g = ((argb >> 8) & 0xff);
3754     int b = ((argb >> 0) & 0xff);
3755     
3756     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
3757     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
3758     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
3759     *a = ((argb >> 24) & 0xff);
3760 }
3761
3762 static void 
3763 i965_vpp_clear_surface(VADriverContextP ctx,
3764                        struct i965_post_processing_context *pp_context,
3765                        VASurfaceID surface,
3766                        unsigned int color)
3767 {
3768     struct i965_driver_data *i965 = i965_driver_data(ctx);
3769     struct intel_batchbuffer *batch = pp_context->batch;
3770     struct object_surface *obj_surface = SURFACE(surface);
3771     unsigned int blt_cmd, br13;
3772     unsigned int tiling = 0, swizzle = 0;
3773     int pitch;
3774     unsigned char y, u, v, a = 0;
3775
3776     /* Currently only support NV12 surface */
3777     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3778         return;
3779
3780     rgb_to_yuv(color, &y, &u, &v, &a);
3781
3782     if (a == 0)
3783         return;
3784
3785     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3786     blt_cmd = XY_COLOR_BLT_CMD;
3787     pitch = obj_surface->width;
3788
3789     if (tiling != I915_TILING_NONE) {
3790         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3791         pitch >>= 2;
3792     }
3793
3794     br13 = 0xf0 << 16;
3795     br13 |= BR13_8;
3796     br13 |= pitch;
3797
3798     if (IS_GEN6(i965->intel.device_id) ||
3799         IS_GEN7(i965->intel.device_id)) {
3800         intel_batchbuffer_start_atomic_blt(batch, 48);
3801         BEGIN_BLT_BATCH(batch, 12);
3802     } else {
3803         intel_batchbuffer_start_atomic(batch, 48);
3804         BEGIN_BATCH(batch, 12);
3805     }
3806
3807     OUT_BATCH(batch, blt_cmd);
3808     OUT_BATCH(batch, br13);
3809     OUT_BATCH(batch,
3810               0 << 16 |
3811               0);
3812     OUT_BATCH(batch,
3813               obj_surface->height << 16 |
3814               obj_surface->width);
3815     OUT_RELOC(batch, obj_surface->bo, 
3816               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3817               0);
3818     OUT_BATCH(batch, y);
3819
3820     br13 = 0xf0 << 16;
3821     br13 |= BR13_565;
3822     br13 |= pitch;
3823
3824     OUT_BATCH(batch, blt_cmd);
3825     OUT_BATCH(batch, br13);
3826     OUT_BATCH(batch,
3827               0 << 16 |
3828               0);
3829     OUT_BATCH(batch,
3830               obj_surface->height / 2 << 16 |
3831               obj_surface->width / 2);
3832     OUT_RELOC(batch, obj_surface->bo, 
3833               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3834               obj_surface->width * obj_surface->y_cb_offset);
3835     OUT_BATCH(batch, v << 8 | u);
3836
3837     ADVANCE_BATCH(batch);
3838     intel_batchbuffer_end_atomic(batch);
3839 }
3840
3841 VASurfaceID
3842 i965_post_processing(
3843     VADriverContextP   ctx,
3844     VASurfaceID        surface,
3845     const VARectangle *src_rect,
3846     const VARectangle *dst_rect,
3847     unsigned int       flags,
3848     int               *has_done_scaling  
3849 )
3850 {
3851     struct i965_driver_data *i965 = i965_driver_data(ctx);
3852     VASurfaceID in_surface_id = surface;
3853     VASurfaceID out_surface_id = VA_INVALID_ID;
3854     
3855     *has_done_scaling = 0;
3856
3857     if (HAS_PP(i965)) {
3858         struct object_surface *obj_surface;
3859         VAStatus status;
3860         struct i965_surface src_surface;
3861         struct i965_surface dst_surface;
3862
3863         obj_surface = SURFACE(in_surface_id);
3864
3865         /* Currently only support post processing for NV12 surface */
3866         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3867             return out_surface_id;
3868
3869         _i965LockMutex(&i965->pp_mutex);
3870
3871         if (flags & I965_PP_FLAG_MCDI) {
3872             status = i965_CreateSurfaces(ctx,
3873                                          obj_surface->orig_width,
3874                                          obj_surface->orig_height,
3875                                          VA_RT_FORMAT_YUV420,
3876                                          1,
3877                                          &out_surface_id);
3878             assert(status == VA_STATUS_SUCCESS);
3879             obj_surface = SURFACE(out_surface_id);
3880             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3881             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3882             src_surface.id = in_surface_id;
3883             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3884             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3885                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3886             dst_surface.id = out_surface_id;
3887             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3888             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3889
3890             i965_post_processing_internal(ctx, i965->pp_context,
3891                                           &src_surface,
3892                                           src_rect,
3893                                           &dst_surface,
3894                                           dst_rect,
3895                                           PP_NV12_DNDI,
3896                                           NULL);
3897         }
3898
3899         if (flags & I965_PP_FLAG_AVS) {
3900             struct i965_render_state *render_state = &i965->render_state;
3901             struct intel_region *dest_region = render_state->draw_region;
3902
3903             if (out_surface_id != VA_INVALID_ID)
3904                 in_surface_id = out_surface_id;
3905
3906             status = i965_CreateSurfaces(ctx,
3907                                          dest_region->width,
3908                                          dest_region->height,
3909                                          VA_RT_FORMAT_YUV420,
3910                                          1,
3911                                          &out_surface_id);
3912             assert(status == VA_STATUS_SUCCESS);
3913             obj_surface = SURFACE(out_surface_id);
3914             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3915             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3916             src_surface.id = in_surface_id;
3917             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3918             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3919             dst_surface.id = out_surface_id;
3920             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3921             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3922
3923             i965_post_processing_internal(ctx, i965->pp_context,
3924                                           &src_surface,
3925                                           src_rect,
3926                                           &dst_surface,
3927                                           dst_rect,
3928                                           PP_NV12_AVS,
3929                                           NULL);
3930
3931             if (in_surface_id != surface)
3932                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3933                 
3934             *has_done_scaling = 1;
3935         }
3936
3937         _i965UnlockMutex(&i965->pp_mutex);
3938     }
3939
3940     return out_surface_id;
3941 }       
3942
3943 static VAStatus
3944 i965_image_pl3_processing(VADriverContextP ctx,
3945                           const struct i965_surface *src_surface,
3946                           const VARectangle *src_rect,
3947                           struct i965_surface *dst_surface,
3948                           const VARectangle *dst_rect)
3949 {
3950     struct i965_driver_data *i965 = i965_driver_data(ctx);
3951     struct i965_post_processing_context *pp_context = i965->pp_context;
3952     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
3953     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
3954
3955     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
3956         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3957                                                  src_surface,
3958                                                  src_rect,
3959                                                  dst_surface,
3960                                                  dst_rect,
3961                                                  PP_PL3_LOAD_SAVE_N12,
3962                                                  NULL);
3963     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
3964                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
3965                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
3966                fourcc == VA_FOURCC('I', '4', '2', '0')) {
3967         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3968                                                  src_surface,
3969                                                  src_rect,
3970                                                  dst_surface,
3971                                                  dst_rect,
3972                                                  PP_PL3_LOAD_SAVE_PL3,
3973                                                  NULL);
3974     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3975         if (IS_GEN6(i965->intel.device_id))
3976             vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
3977                                                      src_surface,
3978                                                      src_rect,
3979                                                      dst_surface,
3980                                                      dst_rect,
3981                                                      PP_PL3_LOAD_SAVE_PA,
3982                                                      NULL);
3983     }
3984     else {
3985         assert(0);
3986     }
3987
3988     intel_batchbuffer_flush(pp_context->batch);
3989
3990     return vaStatus;
3991 }
3992
3993 static VAStatus
3994 i965_image_pl2_processing(VADriverContextP ctx,
3995                           const struct i965_surface *src_surface,
3996                           const VARectangle *src_rect,
3997                           struct i965_surface *dst_surface,
3998                           const VARectangle *dst_rect)
3999 {
4000     struct i965_driver_data *i965 = i965_driver_data(ctx);
4001     struct i965_post_processing_context *pp_context = i965->pp_context;
4002     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4003     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4004
4005     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4006         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4007                                                  src_surface,
4008                                                  src_rect,
4009                                                  dst_surface,
4010                                                  dst_rect,
4011                                                  PP_NV12_LOAD_SAVE_N12,
4012                                                  NULL);
4013     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4014                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4015                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4016                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4017         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4018                                                  src_surface,
4019                                                  src_rect,
4020                                                  dst_surface,
4021                                                  dst_rect,
4022                                                  PP_NV12_LOAD_SAVE_PL3,
4023                                                  NULL);
4024     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
4025         if (IS_GEN6(i965->intel.device_id))
4026             vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4027                                                      src_surface,
4028                                                      src_rect,
4029                                                      dst_surface,
4030                                                      dst_rect,
4031                                                      PP_NV12_LOAD_SAVE_PA,
4032                                                      NULL);
4033     }
4034
4035     intel_batchbuffer_flush(pp_context->batch);
4036
4037     return vaStatus;
4038 }
4039
4040 static VAStatus
4041 i965_image_pl1_processing(VADriverContextP ctx,
4042                           const struct i965_surface *src_surface,
4043                           const VARectangle *src_rect,
4044                           struct i965_surface *dst_surface,
4045                           const VARectangle *dst_rect)
4046 {
4047     struct i965_driver_data *i965 = i965_driver_data(ctx);
4048     struct i965_post_processing_context *pp_context = i965->pp_context;
4049     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4050
4051     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4052         i965_post_processing_internal(ctx, i965->pp_context,
4053                                       src_surface,
4054                                       src_rect,
4055                                       dst_surface,
4056                                       dst_rect,
4057                                       PP_PA_LOAD_SAVE_NV12,
4058                                       NULL);
4059     }
4060     else if (fourcc == VA_FOURCC_YV12) {
4061         i965_post_processing_internal(ctx, i965->pp_context,
4062                                       src_surface,
4063                                       src_rect,
4064                                       dst_surface,
4065                                       dst_rect,
4066                                       PP_PA_LOAD_SAVE_PL3,
4067                                       NULL);
4068
4069     }
4070     else {
4071         return VA_STATUS_ERROR_UNKNOWN;
4072     }
4073
4074     intel_batchbuffer_flush(pp_context->batch);
4075
4076     return VA_STATUS_SUCCESS;
4077 }
4078
4079 VAStatus
4080 i965_image_processing(VADriverContextP ctx,
4081                       const struct i965_surface *src_surface,
4082                       const VARectangle *src_rect,
4083                       struct i965_surface *dst_surface,
4084                       const VARectangle *dst_rect)
4085 {
4086     struct i965_driver_data *i965 = i965_driver_data(ctx);
4087     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4088
4089     if (HAS_PP(i965)) {
4090         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4091
4092         _i965LockMutex(&i965->pp_mutex);
4093
4094         switch (fourcc) {
4095         case VA_FOURCC('Y', 'V', '1', '2'):
4096         case VA_FOURCC('I', '4', '2', '0'):
4097         case VA_FOURCC('I', 'M', 'C', '1'):
4098         case VA_FOURCC('I', 'M', 'C', '3'):
4099             status = i965_image_pl3_processing(ctx,
4100                                                src_surface,
4101                                                src_rect,
4102                                                dst_surface,
4103                                                dst_rect);
4104             break;
4105
4106         case  VA_FOURCC('N', 'V', '1', '2'):
4107             status = i965_image_pl2_processing(ctx,
4108                                                src_surface,
4109                                                src_rect,
4110                                                dst_surface,
4111                                                dst_rect);
4112             break;
4113         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4114             if (IS_GEN6(i965->intel.device_id))
4115                 status = i965_image_pl1_processing(ctx,
4116                                                    src_surface,
4117                                                    src_rect,
4118                                                    dst_surface,
4119                                                    dst_rect);
4120             break;
4121
4122         default:
4123             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4124             break;
4125         }
4126         
4127         _i965UnlockMutex(&i965->pp_mutex);
4128     }
4129
4130     return status;
4131 }       
4132
4133 static void
4134 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4135 {
4136     int i;
4137
4138     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4139     pp_context->surface_state_binding_table.bo = NULL;
4140
4141     dri_bo_unreference(pp_context->curbe.bo);
4142     pp_context->curbe.bo = NULL;
4143
4144     dri_bo_unreference(pp_context->sampler_state_table.bo);
4145     pp_context->sampler_state_table.bo = NULL;
4146
4147     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4148     pp_context->sampler_state_table.bo_8x8 = NULL;
4149
4150     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4151     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4152
4153     dri_bo_unreference(pp_context->idrt.bo);
4154     pp_context->idrt.bo = NULL;
4155     pp_context->idrt.num_interface_descriptors = 0;
4156
4157     dri_bo_unreference(pp_context->vfe_state.bo);
4158     pp_context->vfe_state.bo = NULL;
4159
4160     dri_bo_unreference(pp_context->stmm.bo);
4161     pp_context->stmm.bo = NULL;
4162
4163     for (i = 0; i < NUM_PP_MODULES; i++) {
4164         struct pp_module *pp_module = &pp_context->pp_modules[i];
4165
4166         dri_bo_unreference(pp_module->kernel.bo);
4167         pp_module->kernel.bo = NULL;
4168     }
4169
4170     free(pp_context->pp_static_parameter);
4171     free(pp_context->pp_inline_parameter);
4172     pp_context->pp_static_parameter = NULL;
4173     pp_context->pp_inline_parameter = NULL;
4174 }
4175
4176 Bool
4177 i965_post_processing_terminate(VADriverContextP ctx)
4178 {
4179     struct i965_driver_data *i965 = i965_driver_data(ctx);
4180     struct i965_post_processing_context *pp_context = i965->pp_context;
4181
4182     if (pp_context) {
4183         i965_post_processing_context_finalize(pp_context);
4184         free(pp_context);
4185     }
4186
4187     i965->pp_context = NULL;
4188
4189     return True;
4190 }
4191
4192 static void
4193 i965_post_processing_context_init(VADriverContextP ctx,
4194                                   struct i965_post_processing_context *pp_context,
4195                                   struct intel_batchbuffer *batch)
4196 {
4197     struct i965_driver_data *i965 = i965_driver_data(ctx);
4198     int i;
4199
4200     pp_context->urb.size = URB_SIZE((&i965->intel));
4201     pp_context->urb.num_vfe_entries = 32;
4202     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4203     pp_context->urb.num_cs_entries = 1;
4204     
4205     if (IS_GEN7(i965->intel.device_id))
4206         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4207     else
4208         pp_context->urb.size_cs_entry = 2;
4209
4210     pp_context->urb.vfe_start = 0;
4211     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4212         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4213     assert(pp_context->urb.cs_start + 
4214            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4215
4216     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4217     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4218     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4219
4220     if (IS_GEN7(i965->intel.device_id))
4221         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4222     else if (IS_GEN6(i965->intel.device_id))
4223         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4224     else if (IS_IRONLAKE(i965->intel.device_id))
4225         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4226
4227     for (i = 0; i < NUM_PP_MODULES; i++) {
4228         struct pp_module *pp_module = &pp_context->pp_modules[i];
4229         dri_bo_unreference(pp_module->kernel.bo);
4230         if (pp_module->kernel.bin && pp_module->kernel.size) {
4231             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4232                                                 pp_module->kernel.name,
4233                                                 pp_module->kernel.size,
4234                                                 4096);
4235             assert(pp_module->kernel.bo);
4236             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4237         } else {
4238             pp_module->kernel.bo = NULL;
4239         }
4240     }
4241
4242     /* static & inline parameters */
4243     if (IS_GEN7(i965->intel.device_id)) {
4244         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4245         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4246     } else {
4247         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4248         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4249     }
4250
4251     pp_context->batch = batch;
4252 }
4253
4254 Bool
4255 i965_post_processing_init(VADriverContextP ctx)
4256 {
4257     struct i965_driver_data *i965 = i965_driver_data(ctx);
4258     struct i965_post_processing_context *pp_context = i965->pp_context;
4259
4260     if (HAS_PP(i965)) {
4261         if (pp_context == NULL) {
4262             pp_context = calloc(1, sizeof(*pp_context));
4263             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4264             i965->pp_context = pp_context;
4265         }
4266     }
4267
4268     return True;
4269 }
4270
4271 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4272     PP_NULL,    /* VAProcFilterNone */
4273     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4274     PP_NULL,    /* VAProcFilterDeblocking */
4275     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4276     PP_NULL,    /* VAProcFilterSharpening */
4277     PP_NULL,    /* VAProcFilterColorBalance */
4278     PP_NULL,    /* VAProcFilterColorStandard */
4279     PP_NULL,    /* VAProcFilterFrameRateConversion */
4280 };
4281
4282 static const int proc_frame_to_pp_frame[3] = {
4283     I965_SURFACE_FLAG_FRAME,
4284     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4285     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4286 };
4287
4288 static void 
4289 i965_proc_picture(VADriverContextP ctx, 
4290                   VAProfile profile, 
4291                   union codec_state *codec_state,
4292                   struct hw_context *hw_context)
4293 {
4294     struct i965_driver_data *i965 = i965_driver_data(ctx);
4295     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4296     struct proc_state *proc_state = &codec_state->proc;
4297     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4298     struct object_surface *obj_surface;
4299     struct i965_surface src_surface, dst_surface;
4300     VARectangle src_rect, dst_rect;
4301     VAStatus status;
4302     int i;
4303     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4304     int num_tmp_surfaces = 0;
4305     unsigned int tiling = 0, swizzle = 0;
4306     int in_width, in_height;
4307
4308     assert(pipeline_param->surface != VA_INVALID_ID);
4309     assert(proc_state->current_render_target != VA_INVALID_ID);
4310
4311     obj_surface = SURFACE(pipeline_param->surface);
4312     in_width = obj_surface->orig_width;
4313     in_height = obj_surface->orig_height;
4314     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4315
4316     src_surface.id = pipeline_param->surface;
4317     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4318     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4319
4320     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4321         VASurfaceID out_surface_id = VA_INVALID_ID;
4322
4323         src_surface.id = pipeline_param->surface;
4324         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4325         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4326         src_rect.x = 0;
4327         src_rect.y = 0;
4328         src_rect.width = in_width;
4329         src_rect.height = in_height;
4330
4331         status = i965_CreateSurfaces(ctx,
4332                                      in_width,
4333                                      in_height,
4334                                      VA_RT_FORMAT_YUV420,
4335                                      1,
4336                                      &out_surface_id);
4337         assert(status == VA_STATUS_SUCCESS);
4338         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4339         obj_surface = SURFACE(out_surface_id);
4340         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4341
4342         dst_surface.id = out_surface_id;
4343         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4344         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4345         dst_rect.x = 0;
4346         dst_rect.y = 0;
4347         dst_rect.width = in_width;
4348         dst_rect.height = in_height;
4349
4350         status = i965_image_processing(ctx,
4351                                        &src_surface,
4352                                        &src_rect,
4353                                        &dst_surface,
4354                                        &dst_rect);
4355         assert(status == VA_STATUS_SUCCESS);
4356
4357         src_surface.id = out_surface_id;
4358         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4359         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4360     }
4361
4362     if (pipeline_param->surface_region) {
4363         src_rect.x = pipeline_param->surface_region->x;
4364         src_rect.y = pipeline_param->surface_region->y;
4365         src_rect.width = pipeline_param->surface_region->width;
4366         src_rect.height = pipeline_param->surface_region->height;
4367     } else {
4368         src_rect.x = 0;
4369         src_rect.y = 0;
4370         src_rect.width = in_width;
4371         src_rect.height = in_height;
4372     }
4373
4374     if (pipeline_param->output_region) {
4375         dst_rect.x = pipeline_param->output_region->x;
4376         dst_rect.y = pipeline_param->output_region->y;
4377         dst_rect.width = pipeline_param->output_region->width;
4378         dst_rect.height = pipeline_param->output_region->height;
4379     } else {
4380         dst_rect.x = 0;
4381         dst_rect.y = 0;
4382         dst_rect.width = in_width;
4383         dst_rect.height = in_height;
4384     }
4385
4386     obj_surface = SURFACE(proc_state->current_render_target);
4387     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4388     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4389     
4390     for (i = 0; i < pipeline_param->num_filters; i++) {
4391         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4392         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4393         VAProcFilterType filter_type = filter_param->type;
4394         VASurfaceID out_surface_id = VA_INVALID_ID;
4395         int kernel_index = procfilter_to_pp_flag[filter_type];
4396
4397         if (kernel_index != PP_NULL &&
4398             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4399             status = i965_CreateSurfaces(ctx,
4400                                          in_width,
4401                                          in_height,
4402                                          VA_RT_FORMAT_YUV420,
4403                                          1,
4404                                          &out_surface_id);
4405             assert(status == VA_STATUS_SUCCESS);
4406             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4407             obj_surface = SURFACE(out_surface_id);
4408             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4409             dst_surface.id = out_surface_id;
4410             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4411             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4412                                                    &src_surface,
4413                                                    &src_rect,
4414                                                    &dst_surface,
4415                                                    &src_rect,
4416                                                    kernel_index,
4417                                                    filter_param);
4418
4419             if (status == VA_STATUS_SUCCESS) {
4420                 src_surface.id = dst_surface.id;
4421                 src_surface.type = dst_surface.type;
4422                 src_surface.flags = dst_surface.flags;
4423             }
4424         }
4425     }
4426
4427     dst_surface.id = proc_state->current_render_target;
4428     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4429
4430     if (src_rect.width == dst_rect.width &&
4431         src_rect.height == dst_rect.height) {
4432         i965_post_processing_internal(ctx, &proc_context->pp_context,
4433                                       &src_surface,
4434                                       &src_rect,
4435                                       &dst_surface,
4436                                       &dst_rect,
4437                                       PP_NV12_LOAD_SAVE_N12,
4438                                       NULL);
4439     } else {
4440
4441         i965_post_processing_internal(ctx, &proc_context->pp_context,
4442                                       &src_surface,
4443                                       &src_rect,
4444                                       &dst_surface,
4445                                       &dst_rect,
4446                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4447                                       PP_NV12_AVS : PP_NV12_SCALING,
4448                                       NULL);
4449     }
4450
4451     if (num_tmp_surfaces)
4452         i965_DestroySurfaces(ctx,
4453                              tmp_surfaces,
4454                              num_tmp_surfaces);
4455
4456     intel_batchbuffer_flush(hw_context->batch);
4457 }
4458
4459 static void
4460 i965_proc_context_destroy(void *hw_context)
4461 {
4462     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4463
4464     i965_post_processing_context_finalize(&proc_context->pp_context);
4465     intel_batchbuffer_free(proc_context->base.batch);
4466     free(proc_context);
4467 }
4468
4469 struct hw_context *
4470 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4471 {
4472     struct intel_driver_data *intel = intel_driver_data(ctx);
4473     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4474
4475     proc_context->base.destroy = i965_proc_context_destroy;
4476     proc_context->base.run = i965_proc_picture;
4477     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4478     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4479
4480     return (struct hw_context *)proc_context;
4481 }