color conversion between planar and packed formats on IVB
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
96 };
97
98 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
108 };
109
110 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
111                                    const struct i965_surface *src_surface,
112                                    const VARectangle *src_rect,
113                                    struct i965_surface *dst_surface,
114                                    const VARectangle *dst_rect,
115                                    void *filter_param);
116 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
117                                             const struct i965_surface *src_surface,
118                                             const VARectangle *src_rect,
119                                             struct i965_surface *dst_surface,
120                                             const VARectangle *dst_rect,
121                                             void *filter_param);
122 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                            const struct i965_surface *src_surface,
124                                            const VARectangle *src_rect,
125                                            struct i965_surface *dst_surface,
126                                            const VARectangle *dst_rect,
127                                            void *filter_param);
128 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                              const struct i965_surface *src_surface,
130                                              const VARectangle *src_rect,
131                                              struct i965_surface *dst_surface,
132                                              const VARectangle *dst_rect,
133                                              void *filter_param);
134 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                                 const struct i965_surface *src_surface,
136                                                 const VARectangle *src_rect,
137                                                 struct i965_surface *dst_surface,
138                                                 const VARectangle *dst_rect,
139                                                 void *filter_param);
140 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                         const struct i965_surface *src_surface,
142                                         const VARectangle *src_rect,
143                                         struct i965_surface *dst_surface,
144                                         const VARectangle *dst_rect,
145                                         void *filter_param);
146 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                       const struct i965_surface *src_surface,
148                                       const VARectangle *src_rect,
149                                       struct i965_surface *dst_surface,
150                                       const VARectangle *dst_rect,
151                                       void *filter_param);
152
153 static struct pp_module pp_modules_gen5[] = {
154     {
155         {
156             "NULL module (for testing)",
157             PP_NULL,
158             pp_null_gen5,
159             sizeof(pp_null_gen5),
160             NULL,
161         },
162
163         pp_null_initialize,
164     },
165
166     {
167         {
168             "NV12_NV12",
169             PP_NV12_LOAD_SAVE_N12,
170             pp_nv12_load_save_nv12_gen5,
171             sizeof(pp_nv12_load_save_nv12_gen5),
172             NULL,
173         },
174
175         pp_plx_load_save_plx_initialize,
176     },
177
178     {
179         {
180             "NV12_PL3",
181             PP_NV12_LOAD_SAVE_PL3,
182             pp_nv12_load_save_pl3_gen5,
183             sizeof(pp_nv12_load_save_pl3_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "PL3_NV12",
193             PP_PL3_LOAD_SAVE_N12,
194             pp_pl3_load_save_nv12_gen5,
195             sizeof(pp_pl3_load_save_nv12_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_PL3",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_pl3_gen5,
207             sizeof(pp_pl3_load_save_pl3_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize
212     },
213
214     {
215         {
216             "NV12 Scaling module",
217             PP_NV12_SCALING,
218             pp_nv12_scaling_gen5,
219             sizeof(pp_nv12_scaling_gen5),
220             NULL,
221         },
222
223         pp_nv12_scaling_initialize,
224     },
225
226     {
227         {
228             "NV12 AVS module",
229             PP_NV12_AVS,
230             pp_nv12_avs_gen5,
231             sizeof(pp_nv12_avs_gen5),
232             NULL,
233         },
234
235         pp_nv12_avs_initialize_nlas,
236     },
237
238     {
239         {
240             "NV12 DNDI module",
241             PP_NV12_DNDI,
242             pp_nv12_dndi_gen5,
243             sizeof(pp_nv12_dndi_gen5),
244             NULL,
245         },
246
247         pp_nv12_dndi_initialize,
248     },
249
250     {
251         {
252             "NV12 DN module",
253             PP_NV12_DN,
254             pp_nv12_dn_gen5,
255             sizeof(pp_nv12_dn_gen5),
256             NULL,
257         },
258
259         pp_nv12_dn_initialize,
260     },
261
262     {
263         {
264             "NV12_PA module",
265             PP_NV12_LOAD_SAVE_PA,
266             pp_nv12_load_save_pa_gen5,
267             sizeof(pp_nv12_load_save_pa_gen5),
268             NULL,
269         },
270     
271         pp_plx_load_save_plx_initialize,
272     },
273
274     {
275         {
276             "PL3_PA module",
277             PP_PL3_LOAD_SAVE_PA,
278             pp_pl3_load_save_pa_gen5,
279             sizeof(pp_pl3_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PA_NV12 module",
289             PP_PA_LOAD_SAVE_NV12,
290             pp_pa_load_save_nv12_gen5,
291             sizeof(pp_pa_load_save_nv12_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_PL3 module",
301             PP_PA_LOAD_SAVE_PL3,
302             pp_pa_load_save_pl3_gen5,
303             sizeof(pp_pa_load_save_pl3_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310 };
311
312 static const uint32_t pp_null_gen6[][4] = {
313 #include "shaders/post_processing/gen5_6/null.g6b"
314 };
315
316 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
317 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
318 };
319
320 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
321 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
322 };
323
324 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
325 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
326 };
327
328 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
329 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
330 };
331
332 static const uint32_t pp_nv12_scaling_gen6[][4] = {
333 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
334 };
335
336 static const uint32_t pp_nv12_avs_gen6[][4] = {
337 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
338 };
339
340 static const uint32_t pp_nv12_dndi_gen6[][4] = {
341 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
342 };
343
344 static const uint32_t pp_nv12_dn_gen6[][4] = {
345 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
346 };
347
348 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
350 };
351
352 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
354 };
355
356 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
358 };
359
360 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
362 };
363
364 static struct pp_module pp_modules_gen6[] = {
365     {
366         {
367             "NULL module (for testing)",
368             PP_NULL,
369             pp_null_gen6,
370             sizeof(pp_null_gen6),
371             NULL,
372         },
373
374         pp_null_initialize,
375     },
376
377     {
378         {
379             "NV12_NV12",
380             PP_NV12_LOAD_SAVE_N12,
381             pp_nv12_load_save_nv12_gen6,
382             sizeof(pp_nv12_load_save_nv12_gen6),
383             NULL,
384         },
385
386         pp_plx_load_save_plx_initialize,
387     },
388
389     {
390         {
391             "NV12_PL3",
392             PP_NV12_LOAD_SAVE_PL3,
393             pp_nv12_load_save_pl3_gen6,
394             sizeof(pp_nv12_load_save_pl3_gen6),
395             NULL,
396         },
397         
398         pp_plx_load_save_plx_initialize,
399     },
400
401     {
402         {
403             "PL3_NV12",
404             PP_PL3_LOAD_SAVE_N12,
405             pp_pl3_load_save_nv12_gen6,
406             sizeof(pp_pl3_load_save_nv12_gen6),
407             NULL,
408         },
409
410         pp_plx_load_save_plx_initialize,
411     },
412
413     {
414         {
415             "PL3_PL3",
416             PP_PL3_LOAD_SAVE_N12,
417             pp_pl3_load_save_pl3_gen6,
418             sizeof(pp_pl3_load_save_pl3_gen6),
419             NULL,
420         },
421
422         pp_plx_load_save_plx_initialize,
423     },
424
425     {
426         {
427             "NV12 Scaling module",
428             PP_NV12_SCALING,
429             pp_nv12_scaling_gen6,
430             sizeof(pp_nv12_scaling_gen6),
431             NULL,
432         },
433
434         gen6_nv12_scaling_initialize,
435     },
436
437     {
438         {
439             "NV12 AVS module",
440             PP_NV12_AVS,
441             pp_nv12_avs_gen6,
442             sizeof(pp_nv12_avs_gen6),
443             NULL,
444         },
445
446         pp_nv12_avs_initialize_nlas,
447     },
448
449     {
450         {
451             "NV12 DNDI module",
452             PP_NV12_DNDI,
453             pp_nv12_dndi_gen6,
454             sizeof(pp_nv12_dndi_gen6),
455             NULL,
456         },
457
458         pp_nv12_dndi_initialize,
459     },
460
461     {
462         {
463             "NV12 DN module",
464             PP_NV12_DN,
465             pp_nv12_dn_gen6,
466             sizeof(pp_nv12_dn_gen6),
467             NULL,
468         },
469
470         pp_nv12_dn_initialize,
471     },
472     {
473         {
474             "NV12_PA module",
475             PP_NV12_LOAD_SAVE_PA,
476             pp_nv12_load_save_pa_gen6,
477             sizeof(pp_nv12_load_save_pa_gen6),
478             NULL,
479         },
480     
481         pp_plx_load_save_plx_initialize,
482     },
483     
484     {
485         {
486             "PL3_PA module",
487             PP_PL3_LOAD_SAVE_PA,
488             pp_pl3_load_save_pa_gen6,
489             sizeof(pp_pl3_load_save_pa_gen6),
490             NULL,
491         },
492     
493         pp_plx_load_save_plx_initialize,
494     },
495     
496     {
497         {
498             "PA_NV12 module",
499             PP_PA_LOAD_SAVE_NV12,
500             pp_pa_load_save_nv12_gen6,
501             sizeof(pp_pa_load_save_nv12_gen6),
502             NULL,
503         },
504     
505         pp_plx_load_save_plx_initialize,
506     },
507
508     {
509         {
510             "PA_PL3 module",
511             PP_PA_LOAD_SAVE_PL3,
512             pp_pa_load_save_pl3_gen6,
513             sizeof(pp_pa_load_save_pl3_gen6),
514             NULL,
515         },
516     
517         pp_plx_load_save_plx_initialize,
518     },
519     
520 };
521
522 static const uint32_t pp_null_gen7[][4] = {
523 };
524
525 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
526 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
527 };
528
529 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
530 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
531 };
532
533 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
534 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
535 };
536
537 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
538 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
539 };
540
541 static const uint32_t pp_nv12_scaling_gen7[][4] = {
542 #include "shaders/post_processing/gen7/avs.g7b"
543 };
544
545 static const uint32_t pp_nv12_avs_gen7[][4] = {
546 #include "shaders/post_processing/gen7/avs.g7b"
547 };
548
549 static const uint32_t pp_nv12_dndi_gen7[][4] = {
550 // #include "shaders/post_processing/gen7/dndi.g7b"
551 };
552
553 static const uint32_t pp_nv12_dn_gen7[][4] = {
554 };
555 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
556 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
557 };
558 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
559 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
560 };
561 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
562 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
563 };
564 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
565 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
566 };
567
568 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
569                                            const struct i965_surface *src_surface,
570                                            const VARectangle *src_rect,
571                                            struct i965_surface *dst_surface,
572                                            const VARectangle *dst_rect,
573                                            void *filter_param);
574 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
575                                              const struct i965_surface *src_surface,
576                                              const VARectangle *src_rect,
577                                              struct i965_surface *dst_surface,
578                                              const VARectangle *dst_rect,
579                                              void *filter_param);
580 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
581                                            const struct i965_surface *src_surface,
582                                            const VARectangle *src_rect,
583                                            struct i965_surface *dst_surface,
584                                            const VARectangle *dst_rect,
585                                            void *filter_param);
586
587 static struct pp_module pp_modules_gen7[] = {
588     {
589         {
590             "NULL module (for testing)",
591             PP_NULL,
592             pp_null_gen7,
593             sizeof(pp_null_gen7),
594             NULL,
595         },
596
597         pp_null_initialize,
598     },
599
600     {
601         {
602             "NV12_NV12",
603             PP_NV12_LOAD_SAVE_N12,
604             pp_nv12_load_save_nv12_gen7,
605             sizeof(pp_nv12_load_save_nv12_gen7),
606             NULL,
607         },
608
609         gen7_pp_plx_avs_initialize,
610     },
611
612     {
613         {
614             "NV12_PL3",
615             PP_NV12_LOAD_SAVE_PL3,
616             pp_nv12_load_save_pl3_gen7,
617             sizeof(pp_nv12_load_save_pl3_gen7),
618             NULL,
619         },
620         
621         gen7_pp_plx_avs_initialize,
622     },
623
624     {
625         {
626             "PL3_NV12",
627             PP_PL3_LOAD_SAVE_N12,
628             pp_pl3_load_save_nv12_gen7,
629             sizeof(pp_pl3_load_save_nv12_gen7),
630             NULL,
631         },
632
633         gen7_pp_plx_avs_initialize,
634     },
635
636     {
637         {
638             "PL3_PL3",
639             PP_PL3_LOAD_SAVE_N12,
640             pp_pl3_load_save_pl3_gen7,
641             sizeof(pp_pl3_load_save_pl3_gen7),
642             NULL,
643         },
644
645         gen7_pp_plx_avs_initialize,
646     },
647
648     {
649         {
650             "NV12 Scaling module",
651             PP_NV12_SCALING,
652             pp_nv12_scaling_gen7,
653             sizeof(pp_nv12_scaling_gen7),
654             NULL,
655         },
656
657         gen7_pp_plx_avs_initialize,
658     },
659
660     {
661         {
662             "NV12 AVS module",
663             PP_NV12_AVS,
664             pp_nv12_avs_gen7,
665             sizeof(pp_nv12_avs_gen7),
666             NULL,
667         },
668
669         gen7_pp_plx_avs_initialize,
670     },
671
672     {
673         {
674             "NV12 DNDI module",
675             PP_NV12_DNDI,
676             pp_nv12_dndi_gen7,
677             sizeof(pp_nv12_dndi_gen7),
678             NULL,
679         },
680
681         gen7_pp_nv12_dndi_initialize,
682     },
683
684     {
685         {
686             "NV12 DN module",
687             PP_NV12_DN,
688             pp_nv12_dn_gen7,
689             sizeof(pp_nv12_dn_gen7),
690             NULL,
691         },
692
693         gen7_pp_nv12_dn_initialize,
694     },
695     {
696         {
697             "NV12_PA module",
698             PP_NV12_LOAD_SAVE_PA,
699             pp_nv12_load_save_pa_gen7,
700             sizeof(pp_nv12_load_save_pa_gen7),
701             NULL,
702         },
703     
704         gen7_pp_plx_avs_initialize,
705     },
706
707     {
708         {
709             "PL3_PA module",
710             PP_PL3_LOAD_SAVE_PA,
711             pp_pl3_load_save_pa_gen7,
712             sizeof(pp_pl3_load_save_pa_gen7),
713             NULL,
714         },
715     
716         gen7_pp_plx_avs_initialize,
717     },
718
719     {
720         {
721             "PA_NV12 module",
722             PP_PA_LOAD_SAVE_NV12,
723             pp_pa_load_save_nv12_gen7,
724             sizeof(pp_pa_load_save_nv12_gen7),
725             NULL,
726         },
727     
728         gen7_pp_plx_avs_initialize,
729     },
730
731     {
732         {
733             "PA_PL3 module",
734             PP_PA_LOAD_SAVE_PL3,
735             pp_pa_load_save_pl3_gen7,
736             sizeof(pp_pa_load_save_pl3_gen7),
737             NULL,
738         },
739     
740         gen7_pp_plx_avs_initialize,
741     },
742     
743 };
744
745 static int
746 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
747 {
748     struct i965_driver_data *i965 = i965_driver_data(ctx);
749     int fourcc;
750
751     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
752         struct object_image *obj_image = IMAGE(surface->id);
753         fourcc = obj_image->image.format.fourcc;
754     } else {
755         struct object_surface *obj_surface = SURFACE(surface->id);
756         fourcc = obj_surface->fourcc;
757     }
758
759     return fourcc;
760 }
761
762 static void
763 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
764 {
765     switch (tiling) {
766     case I915_TILING_NONE:
767         ss->ss3.tiled_surface = 0;
768         ss->ss3.tile_walk = 0;
769         break;
770     case I915_TILING_X:
771         ss->ss3.tiled_surface = 1;
772         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
773         break;
774     case I915_TILING_Y:
775         ss->ss3.tiled_surface = 1;
776         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
777         break;
778     }
779 }
780
781 static void
782 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
783 {
784     switch (tiling) {
785     case I915_TILING_NONE:
786         ss->ss2.tiled_surface = 0;
787         ss->ss2.tile_walk = 0;
788         break;
789     case I915_TILING_X:
790         ss->ss2.tiled_surface = 1;
791         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
792         break;
793     case I915_TILING_Y:
794         ss->ss2.tiled_surface = 1;
795         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
796         break;
797     }
798 }
799
800 static void
801 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
802 {
803     switch (tiling) {
804     case I915_TILING_NONE:
805         ss->ss0.tiled_surface = 0;
806         ss->ss0.tile_walk = 0;
807         break;
808     case I915_TILING_X:
809         ss->ss0.tiled_surface = 1;
810         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
811         break;
812     case I915_TILING_Y:
813         ss->ss0.tiled_surface = 1;
814         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
815         break;
816     }
817 }
818
819 static void
820 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
821 {
822     switch (tiling) {
823     case I915_TILING_NONE:
824         ss->ss2.tiled_surface = 0;
825         ss->ss2.tile_walk = 0;
826         break;
827     case I915_TILING_X:
828         ss->ss2.tiled_surface = 1;
829         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
830         break;
831     case I915_TILING_Y:
832         ss->ss2.tiled_surface = 1;
833         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
834         break;
835     }
836 }
837
838 static void
839 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
840 {
841     struct i965_interface_descriptor *desc;
842     dri_bo *bo;
843     int pp_index = pp_context->current_pp;
844
845     bo = pp_context->idrt.bo;
846     dri_bo_map(bo, 1);
847     assert(bo->virtual);
848     desc = bo->virtual;
849     memset(desc, 0, sizeof(*desc));
850     desc->desc0.grf_reg_blocks = 10;
851     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
852     desc->desc1.const_urb_entry_read_offset = 0;
853     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
854     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
855     desc->desc2.sampler_count = 0;
856     desc->desc3.binding_table_entry_count = 0;
857     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
858
859     dri_bo_emit_reloc(bo,
860                       I915_GEM_DOMAIN_INSTRUCTION, 0,
861                       desc->desc0.grf_reg_blocks,
862                       offsetof(struct i965_interface_descriptor, desc0),
863                       pp_context->pp_modules[pp_index].kernel.bo);
864
865     dri_bo_emit_reloc(bo,
866                       I915_GEM_DOMAIN_INSTRUCTION, 0,
867                       desc->desc2.sampler_count << 2,
868                       offsetof(struct i965_interface_descriptor, desc2),
869                       pp_context->sampler_state_table.bo);
870
871     dri_bo_unmap(bo);
872     pp_context->idrt.num_interface_descriptors++;
873 }
874
875 static void
876 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
877 {
878     struct i965_vfe_state *vfe_state;
879     dri_bo *bo;
880
881     bo = pp_context->vfe_state.bo;
882     dri_bo_map(bo, 1);
883     assert(bo->virtual);
884     vfe_state = bo->virtual;
885     memset(vfe_state, 0, sizeof(*vfe_state));
886     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
887     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
888     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
889     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
890     vfe_state->vfe1.children_present = 0;
891     vfe_state->vfe2.interface_descriptor_base = 
892         pp_context->idrt.bo->offset >> 4; /* reloc */
893     dri_bo_emit_reloc(bo,
894                       I915_GEM_DOMAIN_INSTRUCTION, 0,
895                       0,
896                       offsetof(struct i965_vfe_state, vfe2),
897                       pp_context->idrt.bo);
898     dri_bo_unmap(bo);
899 }
900
901 static void
902 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
903 {
904     unsigned char *constant_buffer;
905     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
906
907     assert(sizeof(*pp_static_parameter) == 128);
908     dri_bo_map(pp_context->curbe.bo, 1);
909     assert(pp_context->curbe.bo->virtual);
910     constant_buffer = pp_context->curbe.bo->virtual;
911     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
912     dri_bo_unmap(pp_context->curbe.bo);
913 }
914
915 static void
916 ironlake_pp_states_setup(VADriverContextP ctx,
917                          struct i965_post_processing_context *pp_context)
918 {
919     ironlake_pp_interface_descriptor_table(pp_context);
920     ironlake_pp_vfe_state(pp_context);
921     ironlake_pp_upload_constants(pp_context);
922 }
923
924 static void
925 ironlake_pp_pipeline_select(VADriverContextP ctx,
926                             struct i965_post_processing_context *pp_context)
927 {
928     struct intel_batchbuffer *batch = pp_context->batch;
929
930     BEGIN_BATCH(batch, 1);
931     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
932     ADVANCE_BATCH(batch);
933 }
934
935 static void
936 ironlake_pp_urb_layout(VADriverContextP ctx,
937                        struct i965_post_processing_context *pp_context)
938 {
939     struct intel_batchbuffer *batch = pp_context->batch;
940     unsigned int vfe_fence, cs_fence;
941
942     vfe_fence = pp_context->urb.cs_start;
943     cs_fence = pp_context->urb.size;
944
945     BEGIN_BATCH(batch, 3);
946     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
947     OUT_BATCH(batch, 0);
948     OUT_BATCH(batch, 
949               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
950               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
951     ADVANCE_BATCH(batch);
952 }
953
954 static void
955 ironlake_pp_state_base_address(VADriverContextP ctx,
956                                struct i965_post_processing_context *pp_context)
957 {
958     struct intel_batchbuffer *batch = pp_context->batch;
959
960     BEGIN_BATCH(batch, 8);
961     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
962     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
963     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
964     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
965     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
966     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
967     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
968     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
969     ADVANCE_BATCH(batch);
970 }
971
972 static void
973 ironlake_pp_state_pointers(VADriverContextP ctx,
974                            struct i965_post_processing_context *pp_context)
975 {
976     struct intel_batchbuffer *batch = pp_context->batch;
977
978     BEGIN_BATCH(batch, 3);
979     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
980     OUT_BATCH(batch, 0);
981     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
982     ADVANCE_BATCH(batch);
983 }
984
985 static void 
986 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
987                           struct i965_post_processing_context *pp_context)
988 {
989     struct intel_batchbuffer *batch = pp_context->batch;
990
991     BEGIN_BATCH(batch, 2);
992     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
993     OUT_BATCH(batch,
994               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
995               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
996     ADVANCE_BATCH(batch);
997 }
998
999 static void
1000 ironlake_pp_constant_buffer(VADriverContextP ctx,
1001                             struct i965_post_processing_context *pp_context)
1002 {
1003     struct intel_batchbuffer *batch = pp_context->batch;
1004
1005     BEGIN_BATCH(batch, 2);
1006     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1007     OUT_RELOC(batch, pp_context->curbe.bo,
1008               I915_GEM_DOMAIN_INSTRUCTION, 0,
1009               pp_context->urb.size_cs_entry - 1);
1010     ADVANCE_BATCH(batch);    
1011 }
1012
1013 static void
1014 ironlake_pp_object_walker(VADriverContextP ctx,
1015                           struct i965_post_processing_context *pp_context)
1016 {
1017     struct intel_batchbuffer *batch = pp_context->batch;
1018     int x, x_steps, y, y_steps;
1019     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1020
1021     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1022     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1023
1024     for (y = 0; y < y_steps; y++) {
1025         for (x = 0; x < x_steps; x++) {
1026             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1027                 BEGIN_BATCH(batch, 20);
1028                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1029                 OUT_BATCH(batch, 0);
1030                 OUT_BATCH(batch, 0); /* no indirect data */
1031                 OUT_BATCH(batch, 0);
1032
1033                 /* inline data grf 5-6 */
1034                 assert(sizeof(*pp_inline_parameter) == 64);
1035                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1036
1037                 ADVANCE_BATCH(batch);
1038             }
1039         }
1040     }
1041 }
1042
1043 static void
1044 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1045                            struct i965_post_processing_context *pp_context)
1046 {
1047     struct intel_batchbuffer *batch = pp_context->batch;
1048
1049     intel_batchbuffer_start_atomic(batch, 0x1000);
1050     intel_batchbuffer_emit_mi_flush(batch);
1051     ironlake_pp_pipeline_select(ctx, pp_context);
1052     ironlake_pp_state_base_address(ctx, pp_context);
1053     ironlake_pp_state_pointers(ctx, pp_context);
1054     ironlake_pp_urb_layout(ctx, pp_context);
1055     ironlake_pp_cs_urb_layout(ctx, pp_context);
1056     ironlake_pp_constant_buffer(ctx, pp_context);
1057     ironlake_pp_object_walker(ctx, pp_context);
1058     intel_batchbuffer_end_atomic(batch);
1059 }
1060
1061 // update u/v offset when the surface format are packed yuv
1062 static void i965_update_src_surface_uv_offset(
1063     VADriverContextP    ctx, 
1064     struct i965_post_processing_context *pp_context,
1065     const struct i965_surface *surface)
1066 {
1067     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1068     int fourcc = pp_get_surface_fourcc(ctx, surface);
1069     
1070     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
1071         pp_static_parameter->grf1.source_packed_u_offset = 1;
1072         pp_static_parameter->grf1.source_packed_v_offset = 3;
1073     } 
1074     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
1075         pp_static_parameter->grf1.source_packed_y_offset = 1;
1076         pp_static_parameter->grf1.source_packed_v_offset = 2;
1077     }
1078     
1079 }
1080
1081 static void i965_update_dst_surface_uv_offset(
1082     VADriverContextP    ctx, 
1083     struct i965_post_processing_context *pp_context,
1084     const struct i965_surface *surface)
1085 {
1086     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1087     int fourcc = pp_get_surface_fourcc(ctx, surface);
1088     
1089     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
1090         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1091         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1092     } 
1093     else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
1094         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1095         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1096     }
1097     
1098 }
1099
1100 static void
1101 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1102                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1103                           int width, int height, int pitch, int format, 
1104                           int index, int is_target)
1105 {
1106     struct i965_surface_state *ss;
1107     dri_bo *ss_bo;
1108     unsigned int tiling;
1109     unsigned int swizzle;
1110
1111     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1112     ss_bo = pp_context->surface_state_binding_table.bo;
1113     assert(ss_bo);
1114
1115     dri_bo_map(ss_bo, True);
1116     assert(ss_bo->virtual);
1117     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1118     memset(ss, 0, sizeof(*ss));
1119     ss->ss0.surface_type = I965_SURFACE_2D;
1120     ss->ss0.surface_format = format;
1121     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1122     ss->ss2.width = width - 1;
1123     ss->ss2.height = height - 1;
1124     ss->ss3.pitch = pitch - 1;
1125     pp_set_surface_tiling(ss, tiling);
1126     dri_bo_emit_reloc(ss_bo,
1127                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1128                       surf_bo_offset,
1129                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1130                       surf_bo);
1131     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1132     dri_bo_unmap(ss_bo);
1133 }
1134
1135 static void
1136 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1137                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1138                            int width, int height, int wpitch,
1139                            int xoffset, int yoffset,
1140                            int format, int interleave_chroma,
1141                            int index)
1142 {
1143     struct i965_surface_state2 *ss2;
1144     dri_bo *ss2_bo;
1145     unsigned int tiling;
1146     unsigned int swizzle;
1147
1148     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1149     ss2_bo = pp_context->surface_state_binding_table.bo;
1150     assert(ss2_bo);
1151
1152     dri_bo_map(ss2_bo, True);
1153     assert(ss2_bo->virtual);
1154     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1155     memset(ss2, 0, sizeof(*ss2));
1156     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1157     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1158     ss2->ss1.width = width - 1;
1159     ss2->ss1.height = height - 1;
1160     ss2->ss2.pitch = wpitch - 1;
1161     ss2->ss2.interleave_chroma = interleave_chroma;
1162     ss2->ss2.surface_format = format;
1163     ss2->ss3.x_offset_for_cb = xoffset;
1164     ss2->ss3.y_offset_for_cb = yoffset;
1165     pp_set_surface2_tiling(ss2, tiling);
1166     dri_bo_emit_reloc(ss2_bo,
1167                       I915_GEM_DOMAIN_RENDER, 0,
1168                       surf_bo_offset,
1169                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1170                       surf_bo);
1171     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1172     dri_bo_unmap(ss2_bo);
1173 }
1174
1175 static void
1176 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1177                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1178                           int width, int height, int pitch, int format, 
1179                           int index, int is_target)
1180 {
1181     struct gen7_surface_state *ss;
1182     dri_bo *ss_bo;
1183     unsigned int tiling;
1184     unsigned int swizzle;
1185
1186     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1187     ss_bo = pp_context->surface_state_binding_table.bo;
1188     assert(ss_bo);
1189
1190     dri_bo_map(ss_bo, True);
1191     assert(ss_bo->virtual);
1192     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1193     memset(ss, 0, sizeof(*ss));
1194     ss->ss0.surface_type = I965_SURFACE_2D;
1195     ss->ss0.surface_format = format;
1196     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1197     ss->ss2.width = width - 1;
1198     ss->ss2.height = height - 1;
1199     ss->ss3.pitch = pitch - 1;
1200     gen7_pp_set_surface_tiling(ss, tiling);
1201     dri_bo_emit_reloc(ss_bo,
1202                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1203                       surf_bo_offset,
1204                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1205                       surf_bo);
1206     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1207     dri_bo_unmap(ss_bo);
1208 }
1209
1210 static void
1211 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1212                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1213                            int width, int height, int wpitch,
1214                            int xoffset, int yoffset,
1215                            int format, int interleave_chroma,
1216                            int index)
1217 {
1218     struct gen7_surface_state2 *ss2;
1219     dri_bo *ss2_bo;
1220     unsigned int tiling;
1221     unsigned int swizzle;
1222
1223     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1224     ss2_bo = pp_context->surface_state_binding_table.bo;
1225     assert(ss2_bo);
1226
1227     dri_bo_map(ss2_bo, True);
1228     assert(ss2_bo->virtual);
1229     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1230     memset(ss2, 0, sizeof(*ss2));
1231     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1232     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1233     ss2->ss1.width = width - 1;
1234     ss2->ss1.height = height - 1;
1235     ss2->ss2.pitch = wpitch - 1;
1236     ss2->ss2.interleave_chroma = interleave_chroma;
1237     ss2->ss2.surface_format = format;
1238     ss2->ss3.x_offset_for_cb = xoffset;
1239     ss2->ss3.y_offset_for_cb = yoffset;
1240     gen7_pp_set_surface2_tiling(ss2, tiling);
1241     dri_bo_emit_reloc(ss2_bo,
1242                       I915_GEM_DOMAIN_RENDER, 0,
1243                       surf_bo_offset,
1244                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1245                       surf_bo);
1246     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1247     dri_bo_unmap(ss2_bo);
1248 }
1249
1250 static void 
1251 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1252                                 const struct i965_surface *surface, 
1253                                 int base_index, int is_target,
1254                                 int *width, int *height, int *pitch, int *offset)
1255 {
1256     struct i965_driver_data *i965 = i965_driver_data(ctx);
1257     struct object_surface *obj_surface;
1258     struct object_image *obj_image;
1259     dri_bo *bo;
1260     int fourcc = pp_get_surface_fourcc(ctx, surface);
1261     const int Y = 0;
1262     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1263     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1264     const int UV = 1;
1265     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1266     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1267
1268     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1269         obj_surface = SURFACE(surface->id);
1270         bo = obj_surface->bo;
1271         width[0] = obj_surface->orig_width;
1272         height[0] = obj_surface->orig_height;
1273         pitch[0] = obj_surface->width;
1274         offset[0] = 0;
1275
1276         if (packed_yuv ) {
1277             width[0] = obj_surface->orig_width * 2; 
1278             pitch[0] = obj_surface->width * 2;
1279         }
1280         else if (interleaved_uv) {
1281             width[1] = obj_surface->orig_width;
1282             height[1] = obj_surface->orig_height / 2;
1283             pitch[1] = obj_surface->width;
1284             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1285         } else {
1286             width[1] = obj_surface->orig_width / 2;
1287             height[1] = obj_surface->orig_height / 2;
1288             pitch[1] = obj_surface->width / 2;
1289             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1290             width[2] = obj_surface->orig_width / 2;
1291             height[2] = obj_surface->orig_height / 2;
1292             pitch[2] = obj_surface->width / 2;
1293             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1294         }
1295     } else {
1296         obj_image = IMAGE(surface->id);
1297         bo = obj_image->bo;
1298         width[0] = obj_image->image.width;
1299         height[0] = obj_image->image.height;
1300         pitch[0] = obj_image->image.pitches[0];
1301         offset[0] = obj_image->image.offsets[0];
1302
1303         if (packed_yuv ) {
1304             width[0] = obj_image->image.width * 2;
1305         }
1306         else if (interleaved_uv) {
1307             width[1] = obj_image->image.width;
1308             height[1] = obj_image->image.height / 2;
1309             pitch[1] = obj_image->image.pitches[1];
1310             offset[1] = obj_image->image.offsets[1];
1311         } else {
1312             width[1] = obj_image->image.width / 2;
1313             height[1] = obj_image->image.height / 2;
1314             pitch[1] = obj_image->image.pitches[1];
1315             offset[1] = obj_image->image.offsets[1];
1316             width[2] = obj_image->image.width / 2;
1317             height[2] = obj_image->image.height / 2;
1318             pitch[2] = obj_image->image.pitches[2];
1319             offset[2] = obj_image->image.offsets[2];
1320         }
1321     }
1322
1323     /* Y surface */
1324     i965_pp_set_surface_state(ctx, pp_context,
1325                               bo, offset[Y],
1326                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1327                               base_index, is_target);
1328
1329     if (!packed_yuv) {
1330         if (interleaved_uv) {
1331             i965_pp_set_surface_state(ctx, pp_context,
1332                                       bo, offset[UV],
1333                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1334                                       base_index + 1, is_target);
1335         } else {
1336             /* U surface */
1337             i965_pp_set_surface_state(ctx, pp_context,
1338                                       bo, offset[U],
1339                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1340                                       base_index + 1, is_target);
1341
1342             /* V surface */
1343             i965_pp_set_surface_state(ctx, pp_context,
1344                                       bo, offset[V],
1345                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1346                                       base_index + 2, is_target);
1347         }
1348     }
1349
1350 }
1351
1352 static void 
1353 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1354                                      const struct i965_surface *surface, 
1355                                      int base_index, int is_target,
1356                                      int *width, int *height, int *pitch, int *offset)
1357 {
1358     struct i965_driver_data *i965 = i965_driver_data(ctx);
1359     struct object_surface *obj_surface;
1360     struct object_image *obj_image;
1361     dri_bo *bo;
1362     int fourcc = pp_get_surface_fourcc(ctx, surface);
1363     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1364                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1365     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1366                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1367     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1368     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2'));
1369
1370     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1371         obj_surface = SURFACE(surface->id);
1372         bo = obj_surface->bo;
1373         width[0] = obj_surface->orig_width;
1374         height[0] = obj_surface->orig_height;
1375         pitch[0] = obj_surface->width;
1376         offset[0] = 0;
1377
1378         if (packed_yuv) {
1379             if (is_target)
1380                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1381             else
1382                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1383
1384             pitch[0] = obj_surface->width * 2;
1385         }
1386
1387         width[1] = obj_surface->cb_cr_width;
1388         height[1] = obj_surface->cb_cr_height;
1389         pitch[1] = obj_surface->cb_cr_pitch;
1390         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1391
1392         width[2] = obj_surface->cb_cr_width;
1393         height[2] = obj_surface->cb_cr_height;
1394         pitch[2] = obj_surface->cb_cr_pitch;
1395         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1396     } else {
1397         obj_image = IMAGE(surface->id);
1398         bo = obj_image->bo;
1399         width[0] = obj_image->image.width;
1400         height[0] = obj_image->image.height;
1401         pitch[0] = obj_image->image.pitches[0];
1402         offset[0] = obj_image->image.offsets[0];
1403
1404         if (packed_yuv) {
1405             if (is_target)
1406                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1407             else
1408                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1409         } else if (interleaved_uv) {
1410             width[1] = obj_image->image.width / 2;
1411             height[1] = obj_image->image.height / 2;
1412             pitch[1] = obj_image->image.pitches[1];
1413             offset[1] = obj_image->image.offsets[1];
1414         } else {
1415             width[1] = obj_image->image.width / 2;
1416             height[1] = obj_image->image.height / 2;
1417             pitch[1] = obj_image->image.pitches[U];
1418             offset[1] = obj_image->image.offsets[U];
1419             width[2] = obj_image->image.width / 2;
1420             height[2] = obj_image->image.height / 2;
1421             pitch[2] = obj_image->image.pitches[V];
1422             offset[2] = obj_image->image.offsets[V];
1423         }
1424     }
1425
1426     if (is_target) {
1427         gen7_pp_set_surface_state(ctx, pp_context,
1428                                   bo, 0,
1429                                   width[0] / 4, height[0], pitch[0],
1430                                   I965_SURFACEFORMAT_R8_SINT,
1431                                   base_index, 1);
1432
1433         if (!packed_yuv) {
1434             if (interleaved_uv) {
1435                 gen7_pp_set_surface_state(ctx, pp_context,
1436                                           bo, offset[1],
1437                                           width[1] / 2, height[1], pitch[1],
1438                                           I965_SURFACEFORMAT_R8G8_SINT,
1439                                           base_index + 1, 1);
1440             } else {
1441                 gen7_pp_set_surface_state(ctx, pp_context,
1442                                           bo, offset[1],
1443                                           width[1] / 4, height[1], pitch[1],
1444                                           I965_SURFACEFORMAT_R8_SINT,
1445                                           base_index + 1, 1);
1446                 gen7_pp_set_surface_state(ctx, pp_context,
1447                                           bo, offset[2],
1448                                           width[2] / 4, height[2], pitch[2],
1449                                           I965_SURFACEFORMAT_R8_SINT,
1450                                           base_index + 2, 1);
1451             }
1452         }
1453     } else {
1454         int format0 = SURFACE_FORMAT_Y8_UNORM;
1455
1456         switch (fourcc) {
1457         case VA_FOURCC('Y', 'U', 'Y', '2'):
1458             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1459             break;
1460
1461         default:
1462             break;
1463         }
1464
1465         gen7_pp_set_surface2_state(ctx, pp_context,
1466                                    bo, offset[0],
1467                                    width[0], height[0], pitch[0],
1468                                    0, 0,
1469                                    format0, 0,
1470                                    base_index);
1471
1472         if (!packed_yuv) {
1473             if (interleaved_uv) {
1474                 gen7_pp_set_surface2_state(ctx, pp_context,
1475                                            bo, offset[1],
1476                                            width[1], height[1], pitch[1],
1477                                            0, 0,
1478                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1479                                            base_index + 1);
1480             } else {
1481                 gen7_pp_set_surface2_state(ctx, pp_context,
1482                                            bo, offset[1],
1483                                            width[1], height[1], pitch[1],
1484                                            0, 0,
1485                                            SURFACE_FORMAT_R8_UNORM, 0,
1486                                            base_index + 1);
1487                 gen7_pp_set_surface2_state(ctx, pp_context,
1488                                            bo, offset[2],
1489                                            width[2], height[2], pitch[2],
1490                                            0, 0,
1491                                            SURFACE_FORMAT_R8_UNORM, 0,
1492                                            base_index + 2);
1493             }
1494         }
1495     }
1496 }
1497
1498 static int
1499 pp_null_x_steps(void *private_context)
1500 {
1501     return 1;
1502 }
1503
1504 static int
1505 pp_null_y_steps(void *private_context)
1506 {
1507     return 1;
1508 }
1509
1510 static int
1511 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1512 {
1513     return 0;
1514 }
1515
1516 static VAStatus
1517 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1518                    const struct i965_surface *src_surface,
1519                    const VARectangle *src_rect,
1520                    struct i965_surface *dst_surface,
1521                    const VARectangle *dst_rect,
1522                    void *filter_param)
1523 {
1524     /* private function & data */
1525     pp_context->pp_x_steps = pp_null_x_steps;
1526     pp_context->pp_y_steps = pp_null_y_steps;
1527     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1528
1529     dst_surface->flags = src_surface->flags;
1530
1531     return VA_STATUS_SUCCESS;
1532 }
1533
1534 static int
1535 pp_load_save_x_steps(void *private_context)
1536 {
1537     return 1;
1538 }
1539
1540 static int
1541 pp_load_save_y_steps(void *private_context)
1542 {
1543     struct pp_load_save_context *pp_load_save_context = private_context;
1544
1545     return pp_load_save_context->dest_h / 8;
1546 }
1547
1548 static int
1549 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1550 {
1551     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1552
1553     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1554     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1555     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1556     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1557
1558     return 0;
1559 }
1560
1561 static VAStatus
1562 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1563                                 const struct i965_surface *src_surface,
1564                                 const VARectangle *src_rect,
1565                                 struct i965_surface *dst_surface,
1566                                 const VARectangle *dst_rect,
1567                                 void *filter_param)
1568 {
1569     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1570     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1571     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1572     int width[3], height[3], pitch[3], offset[3];
1573     const int Y = 0;
1574
1575     /* source surface */
1576     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1577                                     width, height, pitch, offset);
1578
1579     /* destination surface */
1580     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1581                                     width, height, pitch, offset);
1582
1583     /* private function & data */
1584     pp_context->pp_x_steps = pp_load_save_x_steps;
1585     pp_context->pp_y_steps = pp_load_save_y_steps;
1586     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1587     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1588     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1589
1590     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1591     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1592
1593     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1594     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1595
1596     // update u/v offset for packed yuv
1597     i965_update_src_surface_uv_offset (ctx, pp_context, src_surface);
1598     i965_update_dst_surface_uv_offset (ctx, pp_context, dst_surface);
1599
1600     dst_surface->flags = src_surface->flags;
1601
1602     return VA_STATUS_SUCCESS;
1603 }
1604
1605 static int
1606 pp_scaling_x_steps(void *private_context)
1607 {
1608     return 1;
1609 }
1610
1611 static int
1612 pp_scaling_y_steps(void *private_context)
1613 {
1614     struct pp_scaling_context *pp_scaling_context = private_context;
1615
1616     return pp_scaling_context->dest_h / 8;
1617 }
1618
1619 static int
1620 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1621 {
1622     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1623     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1624     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1625     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1626     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1627
1628     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1629     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1630     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1631     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1632     
1633     return 0;
1634 }
1635
1636 static VAStatus
1637 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1638                            const struct i965_surface *src_surface,
1639                            const VARectangle *src_rect,
1640                            struct i965_surface *dst_surface,
1641                            const VARectangle *dst_rect,
1642                            void *filter_param)
1643 {
1644     struct i965_driver_data *i965 = i965_driver_data(ctx);
1645     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1646     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1647     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1648     struct object_surface *obj_surface;
1649     struct i965_sampler_state *sampler_state;
1650     int in_w, in_h, in_wpitch, in_hpitch;
1651     int out_w, out_h, out_wpitch, out_hpitch;
1652
1653     /* source surface */
1654     obj_surface = SURFACE(src_surface->id);
1655     in_w = obj_surface->orig_width;
1656     in_h = obj_surface->orig_height;
1657     in_wpitch = obj_surface->width;
1658     in_hpitch = obj_surface->height;
1659
1660     /* source Y surface index 1 */
1661     i965_pp_set_surface_state(ctx, pp_context,
1662                               obj_surface->bo, 0,
1663                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1664                               1, 0);
1665
1666     /* source UV surface index 2 */
1667     i965_pp_set_surface_state(ctx, pp_context,
1668                               obj_surface->bo, in_wpitch * in_hpitch,
1669                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1670                               2, 0);
1671
1672     /* destination surface */
1673     obj_surface = SURFACE(dst_surface->id);
1674     out_w = obj_surface->orig_width;
1675     out_h = obj_surface->orig_height;
1676     out_wpitch = obj_surface->width;
1677     out_hpitch = obj_surface->height;
1678
1679     /* destination Y surface index 7 */
1680     i965_pp_set_surface_state(ctx, pp_context,
1681                               obj_surface->bo, 0,
1682                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1683                               7, 1);
1684
1685     /* destination UV surface index 8 */
1686     i965_pp_set_surface_state(ctx, pp_context,
1687                               obj_surface->bo, out_wpitch * out_hpitch,
1688                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1689                               8, 1);
1690
1691     /* sampler state */
1692     dri_bo_map(pp_context->sampler_state_table.bo, True);
1693     assert(pp_context->sampler_state_table.bo->virtual);
1694     sampler_state = pp_context->sampler_state_table.bo->virtual;
1695
1696     /* SIMD16 Y index 1 */
1697     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1698     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1699     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1700     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1701     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1702
1703     /* SIMD16 UV index 2 */
1704     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1705     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1706     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1707     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1708     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1709
1710     dri_bo_unmap(pp_context->sampler_state_table.bo);
1711
1712     /* private function & data */
1713     pp_context->pp_x_steps = pp_scaling_x_steps;
1714     pp_context->pp_y_steps = pp_scaling_y_steps;
1715     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1716
1717     pp_scaling_context->dest_x = dst_rect->x;
1718     pp_scaling_context->dest_y = dst_rect->y;
1719     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1720     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1721     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1722     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1723
1724     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1725
1726     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1727     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1728     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1729     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1730     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1731
1732     dst_surface->flags = src_surface->flags;
1733
1734     return VA_STATUS_SUCCESS;
1735 }
1736
1737 static int
1738 pp_avs_x_steps(void *private_context)
1739 {
1740     struct pp_avs_context *pp_avs_context = private_context;
1741
1742     return pp_avs_context->dest_w / 16;
1743 }
1744
1745 static int
1746 pp_avs_y_steps(void *private_context)
1747 {
1748     return 1;
1749 }
1750
1751 static int
1752 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1753 {
1754     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1755     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1756     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1757     float src_x_steping, src_y_steping, video_step_delta;
1758     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1759
1760     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1761         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1762         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1763     } else if (tmp_w >= pp_avs_context->dest_w) {
1764         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1765         pp_inline_parameter->grf6.video_step_delta = 0;
1766         
1767         if (x == 0) {
1768             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1769                 pp_avs_context->src_normalized_x;
1770         } else {
1771             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1772             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1773             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1774                 16 * 15 * video_step_delta / 2;
1775         }
1776     } else {
1777         int n0, n1, n2, nls_left, nls_right;
1778         int factor_a = 5, factor_b = 4;
1779         float f;
1780
1781         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1782         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1783         n2 = tmp_w / (16 * factor_a);
1784         nls_left = n0 + n2;
1785         nls_right = n1 + n2;
1786         f = (float) n2 * 16 / tmp_w;
1787         
1788         if (n0 < 5) {
1789             pp_inline_parameter->grf6.video_step_delta = 0.0;
1790
1791             if (x == 0) {
1792                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1793                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1794             } else {
1795                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1796                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1797                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1798                     16 * 15 * video_step_delta / 2;
1799             }
1800         } else {
1801             if (x < nls_left) {
1802                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1803                 float a = f / (nls_left * 16 * factor_b);
1804                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1805                 
1806                 pp_inline_parameter->grf6.video_step_delta = b;
1807
1808                 if (x == 0) {
1809                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1810                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1811                 } else {
1812                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1813                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1814                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1815                         16 * 15 * video_step_delta / 2;
1816                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1817                 }
1818             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1819                 /* scale the center linearly */
1820                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1821                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1822                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1823                     16 * 15 * video_step_delta / 2;
1824                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1825                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1826             } else {
1827                 float a = f / (nls_right * 16 * factor_b);
1828                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1829
1830                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1831                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1832                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1833                     16 * 15 * video_step_delta / 2;
1834                 pp_inline_parameter->grf6.video_step_delta = -b;
1835
1836                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1837                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1838                 else
1839                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1840             }
1841         }
1842     }
1843
1844     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1845     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1846     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1847     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1848
1849     return 0;
1850 }
1851
1852 static VAStatus
1853 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1854                        const struct i965_surface *src_surface,
1855                        const VARectangle *src_rect,
1856                        struct i965_surface *dst_surface,
1857                        const VARectangle *dst_rect,
1858                        void *filter_param,
1859                        int nlas)
1860 {
1861     struct i965_driver_data *i965 = i965_driver_data(ctx);
1862     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1863     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1864     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1865     struct object_surface *obj_surface;
1866     struct i965_sampler_8x8 *sampler_8x8;
1867     struct i965_sampler_8x8_state *sampler_8x8_state;
1868     int index;
1869     int in_w, in_h, in_wpitch, in_hpitch;
1870     int out_w, out_h, out_wpitch, out_hpitch;
1871     int i;
1872
1873     /* surface */
1874     obj_surface = SURFACE(src_surface->id);
1875     in_w = obj_surface->orig_width;
1876     in_h = obj_surface->orig_height;
1877     in_wpitch = obj_surface->width;
1878     in_hpitch = obj_surface->height;
1879
1880     /* source Y surface index 1 */
1881     i965_pp_set_surface2_state(ctx, pp_context,
1882                                obj_surface->bo, 0,
1883                                in_w, in_h, in_wpitch,
1884                                0, 0,
1885                                SURFACE_FORMAT_Y8_UNORM, 0,
1886                                1);
1887
1888     /* source UV surface index 2 */
1889     i965_pp_set_surface2_state(ctx, pp_context,
1890                                obj_surface->bo, in_wpitch * in_hpitch,
1891                                in_w / 2, in_h / 2, in_wpitch,
1892                                0, 0,
1893                                SURFACE_FORMAT_R8B8_UNORM, 0,
1894                                2);
1895
1896     /* destination surface */
1897     obj_surface = SURFACE(dst_surface->id);
1898     out_w = obj_surface->orig_width;
1899     out_h = obj_surface->orig_height;
1900     out_wpitch = obj_surface->width;
1901     out_hpitch = obj_surface->height;
1902     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1903
1904     /* destination Y surface index 7 */
1905     i965_pp_set_surface_state(ctx, pp_context,
1906                               obj_surface->bo, 0,
1907                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1908                               7, 1);
1909
1910     /* destination UV surface index 8 */
1911     i965_pp_set_surface_state(ctx, pp_context,
1912                               obj_surface->bo, out_wpitch * out_hpitch,
1913                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1914                               8, 1);
1915
1916     /* sampler 8x8 state */
1917     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1918     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1919     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1920     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1921     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1922
1923     for (i = 0; i < 17; i++) {
1924         /* for Y channel, currently ignore */
1925         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
1926         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
1927         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
1928         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
1929         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
1930         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
1931         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
1932         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
1933         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
1934         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
1935         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
1936         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
1937         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
1938         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
1939         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
1940         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
1941         /* for U/V channel, 0.25 */
1942         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
1943         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
1944         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
1945         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
1946         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
1947         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
1948         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
1949         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
1950         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
1951         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
1952         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
1953         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
1954         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
1955         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
1956         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
1957         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
1958     }
1959
1960     sampler_8x8_state->dw136.default_sharpness_level = 0;
1961     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1962     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1963     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1964     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1965
1966     /* sampler 8x8 */
1967     dri_bo_map(pp_context->sampler_state_table.bo, True);
1968     assert(pp_context->sampler_state_table.bo->virtual);
1969     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1970     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1971
1972     /* sample_8x8 Y index 1 */
1973     index = 1;
1974     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1975     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1976     sampler_8x8[index].dw0.ief_bypass = 1;
1977     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1978     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1979     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1980     sampler_8x8[index].dw2.global_noise_estimation = 22;
1981     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1982     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1983     sampler_8x8[index].dw3.strong_edge_weight = 7;
1984     sampler_8x8[index].dw3.regular_weight = 2;
1985     sampler_8x8[index].dw3.non_edge_weight = 0;
1986     sampler_8x8[index].dw3.gain_factor = 40;
1987     sampler_8x8[index].dw4.steepness_boost = 0;
1988     sampler_8x8[index].dw4.steepness_threshold = 0;
1989     sampler_8x8[index].dw4.mr_boost = 0;
1990     sampler_8x8[index].dw4.mr_threshold = 5;
1991     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1992     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1993     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1994     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1995     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1996     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1997     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1998     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1999     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2000     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2001     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2002     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2003     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2004     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2005     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2006     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2007     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2008     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2009     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2010     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2011     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2012     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2013     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2014     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2015     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2016     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2017     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2018     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2019     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2020     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2021     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2022     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2023     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2024     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2025     sampler_8x8[index].dw13.limiter_boost = 0;
2026     sampler_8x8[index].dw13.minimum_limiter = 10;
2027     sampler_8x8[index].dw13.maximum_limiter = 11;
2028     sampler_8x8[index].dw14.clip_limiter = 130;
2029     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2030                       I915_GEM_DOMAIN_RENDER, 
2031                       0,
2032                       0,
2033                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2034                       pp_context->sampler_state_table.bo_8x8);
2035
2036     /* sample_8x8 UV index 2 */
2037     index = 2;
2038     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2039     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2040     sampler_8x8[index].dw0.ief_bypass = 1;
2041     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2042     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2043     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2044     sampler_8x8[index].dw2.global_noise_estimation = 22;
2045     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2046     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2047     sampler_8x8[index].dw3.strong_edge_weight = 7;
2048     sampler_8x8[index].dw3.regular_weight = 2;
2049     sampler_8x8[index].dw3.non_edge_weight = 0;
2050     sampler_8x8[index].dw3.gain_factor = 40;
2051     sampler_8x8[index].dw4.steepness_boost = 0;
2052     sampler_8x8[index].dw4.steepness_threshold = 0;
2053     sampler_8x8[index].dw4.mr_boost = 0;
2054     sampler_8x8[index].dw4.mr_threshold = 5;
2055     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2056     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2057     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2058     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2059     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2060     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2061     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2062     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2063     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2064     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2065     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2066     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2067     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2068     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2069     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2070     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2071     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2072     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2073     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2074     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2075     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2076     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2077     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2078     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2079     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2080     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2081     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2082     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2083     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2084     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2085     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2086     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2087     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2088     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2089     sampler_8x8[index].dw13.limiter_boost = 0;
2090     sampler_8x8[index].dw13.minimum_limiter = 10;
2091     sampler_8x8[index].dw13.maximum_limiter = 11;
2092     sampler_8x8[index].dw14.clip_limiter = 130;
2093     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2094                       I915_GEM_DOMAIN_RENDER, 
2095                       0,
2096                       0,
2097                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2098                       pp_context->sampler_state_table.bo_8x8);
2099
2100     dri_bo_unmap(pp_context->sampler_state_table.bo);
2101
2102     /* private function & data */
2103     pp_context->pp_x_steps = pp_avs_x_steps;
2104     pp_context->pp_y_steps = pp_avs_y_steps;
2105     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2106
2107     pp_avs_context->dest_x = dst_rect->x;
2108     pp_avs_context->dest_y = dst_rect->y;
2109     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2110     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2111     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
2112     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2113     pp_avs_context->src_w = src_rect->width;
2114     pp_avs_context->src_h = src_rect->height;
2115
2116     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2117     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2118
2119     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
2120     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2121     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2122     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2123     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2124     pp_inline_parameter->grf6.video_step_delta = 0.0;
2125
2126     dst_surface->flags = src_surface->flags;
2127
2128     return VA_STATUS_SUCCESS;
2129 }
2130
2131 static VAStatus
2132 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2133                             const struct i965_surface *src_surface,
2134                             const VARectangle *src_rect,
2135                             struct i965_surface *dst_surface,
2136                             const VARectangle *dst_rect,
2137                             void *filter_param)
2138 {
2139     return pp_nv12_avs_initialize(ctx, pp_context,
2140                                   src_surface,
2141                                   src_rect,
2142                                   dst_surface,
2143                                   dst_rect,
2144                                   filter_param,
2145                                   1);
2146 }
2147
2148 static VAStatus
2149 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2150                              const struct i965_surface *src_surface,
2151                              const VARectangle *src_rect,
2152                              struct i965_surface *dst_surface,
2153                              const VARectangle *dst_rect,
2154                              void *filter_param)
2155 {
2156     return pp_nv12_avs_initialize(ctx, pp_context,
2157                                   src_surface,
2158                                   src_rect,
2159                                   dst_surface,
2160                                   dst_rect,
2161                                   filter_param,
2162                                   0);    
2163 }
2164
2165 static int
2166 gen7_pp_avs_x_steps(void *private_context)
2167 {
2168     struct pp_avs_context *pp_avs_context = private_context;
2169
2170     return pp_avs_context->dest_w / 16;
2171 }
2172
2173 static int
2174 gen7_pp_avs_y_steps(void *private_context)
2175 {
2176     struct pp_avs_context *pp_avs_context = private_context;
2177
2178     return pp_avs_context->dest_h / 16;
2179 }
2180
2181 static int
2182 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2183 {
2184     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2185     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2186
2187     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2188     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2189     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2190     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2191
2192     return 0;
2193 }
2194
2195 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2196                                               struct i965_post_processing_context *pp_context,
2197                                               const struct i965_surface *surface)
2198 {
2199     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2200     int fourcc = pp_get_surface_fourcc(ctx, surface);
2201     
2202     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2203         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2204         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2205         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2206     }
2207 }
2208
2209 static VAStatus
2210 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2211                            const struct i965_surface *src_surface,
2212                            const VARectangle *src_rect,
2213                            struct i965_surface *dst_surface,
2214                            const VARectangle *dst_rect,
2215                            void *filter_param)
2216 {
2217     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2218     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2219     struct gen7_sampler_8x8 *sampler_8x8;
2220     struct i965_sampler_8x8_state *sampler_8x8_state;
2221     int index, i;
2222     int width[3], height[3], pitch[3], offset[3];
2223
2224     /* source surface */
2225     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2226                                          width, height, pitch, offset);
2227
2228     /* destination surface */
2229     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2230                                          width, height, pitch, offset);
2231
2232     /* sampler 8x8 state */
2233     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2234     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2235     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2236     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2237     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2238
2239     for (i = 0; i < 17; i++) {
2240         /* for Y channel, currently ignore */
2241         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2242         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2243         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2244         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2245         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2246         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2247         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2248         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2249         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2250         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2251         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2252         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2253         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2254         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2255         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2256         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2257         /* for U/V channel, 0.25 */
2258         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2259         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2260         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2261         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2262         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2263         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2264         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2265         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2266         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2267         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2268         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2269         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2270         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2271         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2272         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2273         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2274     }
2275
2276     sampler_8x8_state->dw136.default_sharpness_level = 0;
2277     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2278     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2279     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2280     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2281
2282     /* sampler 8x8 */
2283     dri_bo_map(pp_context->sampler_state_table.bo, True);
2284     assert(pp_context->sampler_state_table.bo->virtual);
2285     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2286     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2287
2288     /* sample_8x8 Y index 4 */
2289     index = 4;
2290     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2291     sampler_8x8[index].dw0.global_noise_estimation = 255;
2292     sampler_8x8[index].dw0.ief_bypass = 1;
2293
2294     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2295
2296     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2297     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2298     sampler_8x8[index].dw2.r5x_coefficient = 9;
2299     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2300     sampler_8x8[index].dw2.r5c_coefficient = 3;
2301
2302     sampler_8x8[index].dw3.r3x_coefficient = 27;
2303     sampler_8x8[index].dw3.r3c_coefficient = 5;
2304     sampler_8x8[index].dw3.gain_factor = 40;
2305     sampler_8x8[index].dw3.non_edge_weight = 1;
2306     sampler_8x8[index].dw3.regular_weight = 2;
2307     sampler_8x8[index].dw3.strong_edge_weight = 7;
2308     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2309
2310     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2311                       I915_GEM_DOMAIN_RENDER, 
2312                       0,
2313                       0,
2314                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2315                       pp_context->sampler_state_table.bo_8x8);
2316
2317     /* sample_8x8 UV index 8 */
2318     index = 8;
2319     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2320     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2321     sampler_8x8[index].dw0.global_noise_estimation = 255;
2322     sampler_8x8[index].dw0.ief_bypass = 1;
2323     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2324     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2325     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2326     sampler_8x8[index].dw2.r5x_coefficient = 9;
2327     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2328     sampler_8x8[index].dw2.r5c_coefficient = 3;
2329     sampler_8x8[index].dw3.r3x_coefficient = 27;
2330     sampler_8x8[index].dw3.r3c_coefficient = 5;
2331     sampler_8x8[index].dw3.gain_factor = 40;
2332     sampler_8x8[index].dw3.non_edge_weight = 1;
2333     sampler_8x8[index].dw3.regular_weight = 2;
2334     sampler_8x8[index].dw3.strong_edge_weight = 7;
2335     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2336
2337     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2338                       I915_GEM_DOMAIN_RENDER, 
2339                       0,
2340                       0,
2341                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2342                       pp_context->sampler_state_table.bo_8x8);
2343
2344     /* sampler_8x8 V, index 12 */
2345     index = 12;
2346     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2347     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2348     sampler_8x8[index].dw0.global_noise_estimation = 255;
2349     sampler_8x8[index].dw0.ief_bypass = 1;
2350     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2351     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2352     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2353     sampler_8x8[index].dw2.r5x_coefficient = 9;
2354     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2355     sampler_8x8[index].dw2.r5c_coefficient = 3;
2356     sampler_8x8[index].dw3.r3x_coefficient = 27;
2357     sampler_8x8[index].dw3.r3c_coefficient = 5;
2358     sampler_8x8[index].dw3.gain_factor = 40;
2359     sampler_8x8[index].dw3.non_edge_weight = 1;
2360     sampler_8x8[index].dw3.regular_weight = 2;
2361     sampler_8x8[index].dw3.strong_edge_weight = 7;
2362     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2363
2364     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2365                       I915_GEM_DOMAIN_RENDER, 
2366                       0,
2367                       0,
2368                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2369                       pp_context->sampler_state_table.bo_8x8);
2370
2371     dri_bo_unmap(pp_context->sampler_state_table.bo);
2372
2373     /* private function & data */
2374     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2375     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2376     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2377
2378     pp_avs_context->dest_x = dst_rect->x;
2379     pp_avs_context->dest_y = dst_rect->y;
2380     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2381     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2382     pp_avs_context->src_w = src_rect->width;
2383     pp_avs_context->src_h = src_rect->height;
2384
2385     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2386     dw = MAX(dw, pp_avs_context->dest_w);
2387
2388     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2389     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2390     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2391     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2392     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2393
2394     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2395
2396     dst_surface->flags = src_surface->flags;
2397
2398     return VA_STATUS_SUCCESS;
2399 }
2400
2401 static int
2402 pp_dndi_x_steps(void *private_context)
2403 {
2404     return 1;
2405 }
2406
2407 static int
2408 pp_dndi_y_steps(void *private_context)
2409 {
2410     struct pp_dndi_context *pp_dndi_context = private_context;
2411
2412     return pp_dndi_context->dest_h / 4;
2413 }
2414
2415 static int
2416 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2417 {
2418     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2419
2420     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2421     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2422
2423     return 0;
2424 }
2425
2426 static VAStatus
2427 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2428                         const struct i965_surface *src_surface,
2429                         const VARectangle *src_rect,
2430                         struct i965_surface *dst_surface,
2431                         const VARectangle *dst_rect,
2432                         void *filter_param)
2433 {
2434     struct i965_driver_data *i965 = i965_driver_data(ctx);
2435     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2436     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2437     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2438     struct object_surface *obj_surface;
2439     struct i965_sampler_dndi *sampler_dndi;
2440     int index;
2441     int w, h;
2442     int orig_w, orig_h;
2443     int dndi_top_first = 1;
2444
2445     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2446         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2447
2448     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2449         dndi_top_first = 1;
2450     else
2451         dndi_top_first = 0;
2452
2453     /* surface */
2454     obj_surface = SURFACE(src_surface->id);
2455     orig_w = obj_surface->orig_width;
2456     orig_h = obj_surface->orig_height;
2457     w = obj_surface->width;
2458     h = obj_surface->height;
2459
2460     if (pp_context->stmm.bo == NULL) {
2461         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2462                                            "STMM surface",
2463                                            w * h,
2464                                            4096);
2465         assert(pp_context->stmm.bo);
2466     }
2467
2468     /* source UV surface index 2 */
2469     i965_pp_set_surface_state(ctx, pp_context,
2470                               obj_surface->bo, w * h,
2471                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2472                               2, 0);
2473
2474     /* source YUV surface index 4 */
2475     i965_pp_set_surface2_state(ctx, pp_context,
2476                                obj_surface->bo, 0,
2477                                orig_w, orig_h, w,
2478                                0, h,
2479                                SURFACE_FORMAT_PLANAR_420_8, 1,
2480                                4);
2481
2482     /* source STMM surface index 20 */
2483     i965_pp_set_surface_state(ctx, pp_context,
2484                               pp_context->stmm.bo, 0,
2485                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2486                               20, 1);
2487
2488     /* destination surface */
2489     obj_surface = SURFACE(dst_surface->id);
2490     orig_w = obj_surface->orig_width;
2491     orig_h = obj_surface->orig_height;
2492     w = obj_surface->width;
2493     h = obj_surface->height;
2494
2495     /* destination Y surface index 7 */
2496     i965_pp_set_surface_state(ctx, pp_context,
2497                               obj_surface->bo, 0,
2498                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2499                               7, 1);
2500
2501     /* destination UV surface index 8 */
2502     i965_pp_set_surface_state(ctx, pp_context,
2503                               obj_surface->bo, w * h,
2504                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2505                               8, 1);
2506     /* sampler dndi */
2507     dri_bo_map(pp_context->sampler_state_table.bo, True);
2508     assert(pp_context->sampler_state_table.bo->virtual);
2509     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2510     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2511
2512     /* sample dndi index 1 */
2513     index = 0;
2514     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2515     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2516     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2517     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2518
2519     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2520     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2521     sampler_dndi[index].dw1.stmm_c2 = 1;
2522     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2523     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2524
2525     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2526     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2527     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2528     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2529
2530     sampler_dndi[index].dw3.maximum_stmm = 128;
2531     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2532     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2533     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2534     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2535
2536     sampler_dndi[index].dw4.sdi_delta = 8;
2537     sampler_dndi[index].dw4.sdi_threshold = 128;
2538     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2539     sampler_dndi[index].dw4.stmm_shift_up = 0;
2540     sampler_dndi[index].dw4.stmm_shift_down = 0;
2541     sampler_dndi[index].dw4.minimum_stmm = 0;
2542
2543     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2544     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2545     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2546     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2547
2548     sampler_dndi[index].dw6.dn_enable = 1;
2549     sampler_dndi[index].dw6.di_enable = 1;
2550     sampler_dndi[index].dw6.di_partial = 0;
2551     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2552     sampler_dndi[index].dw6.dndi_stream_id = 0;
2553     sampler_dndi[index].dw6.dndi_first_frame = 1;
2554     sampler_dndi[index].dw6.progressive_dn = 0;
2555     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2556     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2557     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2558
2559     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2560     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2561     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2562     sampler_dndi[index].dw7.column_width_minus1 = 0;
2563
2564     dri_bo_unmap(pp_context->sampler_state_table.bo);
2565
2566     /* private function & data */
2567     pp_context->pp_x_steps = pp_dndi_x_steps;
2568     pp_context->pp_y_steps = pp_dndi_y_steps;
2569     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2570
2571     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2572     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2573     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2574     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2575
2576     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2577     pp_inline_parameter->grf5.number_blocks = w / 16;
2578     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2579     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2580
2581     pp_dndi_context->dest_w = w;
2582     pp_dndi_context->dest_h = h;
2583
2584     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2585
2586     return VA_STATUS_SUCCESS;
2587 }
2588
2589 static int
2590 pp_dn_x_steps(void *private_context)
2591 {
2592     return 1;
2593 }
2594
2595 static int
2596 pp_dn_y_steps(void *private_context)
2597 {
2598     struct pp_dn_context *pp_dn_context = private_context;
2599
2600     return pp_dn_context->dest_h / 8;
2601 }
2602
2603 static int
2604 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2605 {
2606     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2607
2608     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2609     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2610
2611     return 0;
2612 }
2613
2614 static VAStatus
2615 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2616                       const struct i965_surface *src_surface,
2617                       const VARectangle *src_rect,
2618                       struct i965_surface *dst_surface,
2619                       const VARectangle *dst_rect,
2620                       void *filter_param)
2621 {
2622     struct i965_driver_data *i965 = i965_driver_data(ctx);
2623     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2624     struct object_surface *obj_surface;
2625     struct i965_sampler_dndi *sampler_dndi;
2626     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2627     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2628     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2629     int index;
2630     int w, h;
2631     int orig_w, orig_h;
2632     int dn_strength = 15;
2633     int dndi_top_first = 1;
2634     int dn_progressive = 0;
2635
2636     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2637         dndi_top_first = 1;
2638         dn_progressive = 1;
2639     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2640         dndi_top_first = 1;
2641         dn_progressive = 0;
2642     } else {
2643         dndi_top_first = 0;
2644         dn_progressive = 0;
2645     }
2646
2647     if (dn_filter_param) {
2648         float value = dn_filter_param->value;
2649         
2650         if (value > 1.0)
2651             value = 1.0;
2652         
2653         if (value < 0.0)
2654             value = 0.0;
2655
2656         dn_strength = (int)(value * 31.0F);
2657     }
2658
2659     /* surface */
2660     obj_surface = SURFACE(src_surface->id);
2661     orig_w = obj_surface->orig_width;
2662     orig_h = obj_surface->orig_height;
2663     w = obj_surface->width;
2664     h = obj_surface->height;
2665
2666     if (pp_context->stmm.bo == NULL) {
2667         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2668                                            "STMM surface",
2669                                            w * h,
2670                                            4096);
2671         assert(pp_context->stmm.bo);
2672     }
2673
2674     /* source UV surface index 2 */
2675     i965_pp_set_surface_state(ctx, pp_context,
2676                               obj_surface->bo, w * h,
2677                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2678                               2, 0);
2679
2680     /* source YUV surface index 4 */
2681     i965_pp_set_surface2_state(ctx, pp_context,
2682                                obj_surface->bo, 0,
2683                                orig_w, orig_h, w,
2684                                0, h,
2685                                SURFACE_FORMAT_PLANAR_420_8, 1,
2686                                4);
2687
2688     /* source STMM surface index 20 */
2689     i965_pp_set_surface_state(ctx, pp_context,
2690                               pp_context->stmm.bo, 0,
2691                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2692                               20, 1);
2693
2694     /* destination surface */
2695     obj_surface = SURFACE(dst_surface->id);
2696     orig_w = obj_surface->orig_width;
2697     orig_h = obj_surface->orig_height;
2698     w = obj_surface->width;
2699     h = obj_surface->height;
2700
2701     /* destination Y surface index 7 */
2702     i965_pp_set_surface_state(ctx, pp_context,
2703                               obj_surface->bo, 0,
2704                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2705                               7, 1);
2706
2707     /* destination UV surface index 8 */
2708     i965_pp_set_surface_state(ctx, pp_context,
2709                               obj_surface->bo, w * h,
2710                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2711                               8, 1);
2712     /* sampler dn */
2713     dri_bo_map(pp_context->sampler_state_table.bo, True);
2714     assert(pp_context->sampler_state_table.bo->virtual);
2715     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2716     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2717
2718     /* sample dndi index 1 */
2719     index = 0;
2720     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2721     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2722     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2723     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2724
2725     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2726     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2727     sampler_dndi[index].dw1.stmm_c2 = 0;
2728     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2729     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2730
2731     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2732     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2733     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2734     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2735
2736     sampler_dndi[index].dw3.maximum_stmm = 128;
2737     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2738     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2739     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2740     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2741
2742     sampler_dndi[index].dw4.sdi_delta = 8;
2743     sampler_dndi[index].dw4.sdi_threshold = 128;
2744     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2745     sampler_dndi[index].dw4.stmm_shift_up = 0;
2746     sampler_dndi[index].dw4.stmm_shift_down = 0;
2747     sampler_dndi[index].dw4.minimum_stmm = 0;
2748
2749     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2750     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2751     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2752     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2753
2754     sampler_dndi[index].dw6.dn_enable = 1;
2755     sampler_dndi[index].dw6.di_enable = 0;
2756     sampler_dndi[index].dw6.di_partial = 0;
2757     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2758     sampler_dndi[index].dw6.dndi_stream_id = 1;
2759     sampler_dndi[index].dw6.dndi_first_frame = 1;
2760     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2761     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2762     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2763     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2764
2765     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2766     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2767     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2768     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2769
2770     dri_bo_unmap(pp_context->sampler_state_table.bo);
2771
2772     /* private function & data */
2773     pp_context->pp_x_steps = pp_dn_x_steps;
2774     pp_context->pp_y_steps = pp_dn_y_steps;
2775     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2776
2777     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2778     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2779     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2780     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2781
2782     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2783     pp_inline_parameter->grf5.number_blocks = w / 16;
2784     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2785     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2786
2787     pp_dn_context->dest_w = w;
2788     pp_dn_context->dest_h = h;
2789
2790     dst_surface->flags = src_surface->flags;
2791     
2792     return VA_STATUS_SUCCESS;
2793 }
2794
2795 static int
2796 gen7_pp_dndi_x_steps(void *private_context)
2797 {
2798     struct pp_dndi_context *pp_dndi_context = private_context;
2799
2800     return pp_dndi_context->dest_w / 16;
2801 }
2802
2803 static int
2804 gen7_pp_dndi_y_steps(void *private_context)
2805 {
2806     struct pp_dndi_context *pp_dndi_context = private_context;
2807
2808     return pp_dndi_context->dest_h / 4;
2809 }
2810
2811 static int
2812 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2813 {
2814     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2815
2816     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2817     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2818
2819     return 0;
2820 }
2821
2822 static VAStatus
2823 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2824                              const struct i965_surface *src_surface,
2825                              const VARectangle *src_rect,
2826                              struct i965_surface *dst_surface,
2827                              const VARectangle *dst_rect,
2828                              void *filter_param)
2829 {
2830     struct i965_driver_data *i965 = i965_driver_data(ctx);
2831     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2832     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2833     struct object_surface *obj_surface;
2834     struct gen7_sampler_dndi *sampler_dndi;
2835     int index;
2836     int w, h;
2837     int orig_w, orig_h;
2838     int dndi_top_first = 1;
2839
2840     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2841         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2842
2843     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2844         dndi_top_first = 1;
2845     else
2846         dndi_top_first = 0;
2847
2848     /* surface */
2849     obj_surface = SURFACE(src_surface->id);
2850     orig_w = obj_surface->orig_width;
2851     orig_h = obj_surface->orig_height;
2852     w = obj_surface->width;
2853     h = obj_surface->height;
2854
2855     if (pp_context->stmm.bo == NULL) {
2856         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2857                                            "STMM surface",
2858                                            w * h,
2859                                            4096);
2860         assert(pp_context->stmm.bo);
2861     }
2862
2863     /* source UV surface index 1 */
2864     gen7_pp_set_surface_state(ctx, pp_context,
2865                               obj_surface->bo, w * h,
2866                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2867                               1, 0);
2868
2869     /* source YUV surface index 3 */
2870     gen7_pp_set_surface2_state(ctx, pp_context,
2871                                obj_surface->bo, 0,
2872                                orig_w, orig_h, w,
2873                                0, h,
2874                                SURFACE_FORMAT_PLANAR_420_8, 1,
2875                                3);
2876
2877     /* source (temporal reference) YUV surface index 4 */
2878     gen7_pp_set_surface2_state(ctx, pp_context,
2879                                obj_surface->bo, 0,
2880                                orig_w, orig_h, w,
2881                                0, h,
2882                                SURFACE_FORMAT_PLANAR_420_8, 1,
2883                                4);
2884
2885     /* STMM / History Statistics input surface, index 5 */
2886     gen7_pp_set_surface_state(ctx, pp_context,
2887                               pp_context->stmm.bo, 0,
2888                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2889                               5, 1);
2890
2891     /* destination surface */
2892     obj_surface = SURFACE(dst_surface->id);
2893     orig_w = obj_surface->orig_width;
2894     orig_h = obj_surface->orig_height;
2895     w = obj_surface->width;
2896     h = obj_surface->height;
2897
2898     /* destination(Previous frame) Y surface index 27 */
2899     gen7_pp_set_surface_state(ctx, pp_context,
2900                               obj_surface->bo, 0,
2901                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2902                               27, 1);
2903
2904     /* destination(Previous frame) UV surface index 28 */
2905     gen7_pp_set_surface_state(ctx, pp_context,
2906                               obj_surface->bo, w * h,
2907                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2908                               28, 1);
2909
2910     /* destination(Current frame) Y surface index 30 */
2911     gen7_pp_set_surface_state(ctx, pp_context,
2912                               obj_surface->bo, 0,
2913                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2914                               30, 1);
2915
2916     /* destination(Current frame) UV surface index 31 */
2917     gen7_pp_set_surface_state(ctx, pp_context,
2918                               obj_surface->bo, w * h,
2919                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2920                               31, 1);
2921
2922     /* STMM output surface, index 33 */
2923     gen7_pp_set_surface_state(ctx, pp_context,
2924                               pp_context->stmm.bo, 0,
2925                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2926                               33, 1);
2927
2928
2929     /* sampler dndi */
2930     dri_bo_map(pp_context->sampler_state_table.bo, True);
2931     assert(pp_context->sampler_state_table.bo->virtual);
2932     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2933     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2934
2935     /* sample dndi index 0 */
2936     index = 0;
2937     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2938     sampler_dndi[index].dw0.dnmh_delt = 8;
2939     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
2940     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
2941     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2942     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2943
2944     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2945     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2946     sampler_dndi[index].dw1.stmm_c2 = 0;
2947     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2948     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2949
2950     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2951     sampler_dndi[index].dw2.bne_edge_th = 1;
2952     sampler_dndi[index].dw2.smooth_mv_th = 0;
2953     sampler_dndi[index].dw2.sad_tight_th = 5;
2954     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
2955     sampler_dndi[index].dw2.good_neighbor_th = 4;
2956
2957     sampler_dndi[index].dw3.maximum_stmm = 128;
2958     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2959     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2960     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2961     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2962
2963     sampler_dndi[index].dw4.sdi_delta = 8;
2964     sampler_dndi[index].dw4.sdi_threshold = 128;
2965     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2966     sampler_dndi[index].dw4.stmm_shift_up = 0;
2967     sampler_dndi[index].dw4.stmm_shift_down = 0;
2968     sampler_dndi[index].dw4.minimum_stmm = 0;
2969
2970     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2971     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2972     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2973     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2974
2975     sampler_dndi[index].dw6.dn_enable = 0;
2976     sampler_dndi[index].dw6.di_enable = 1;
2977     sampler_dndi[index].dw6.di_partial = 0;
2978     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2979     sampler_dndi[index].dw6.dndi_stream_id = 1;
2980     sampler_dndi[index].dw6.dndi_first_frame = 1;
2981     sampler_dndi[index].dw6.progressive_dn = 0;
2982     sampler_dndi[index].dw6.mcdi_enable = 0;
2983     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2984     sampler_dndi[index].dw6.cat_th1 = 0;
2985     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2986     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2987
2988     sampler_dndi[index].dw7.sad_tha = 5;
2989     sampler_dndi[index].dw7.sad_thb = 10;
2990     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2991     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
2992     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2993     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2994     sampler_dndi[index].dw7.neighborpixel_th = 10;
2995     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2996
2997     dri_bo_unmap(pp_context->sampler_state_table.bo);
2998
2999     /* private function & data */
3000     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3001     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3002     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3003
3004     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3005     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3006     pp_static_parameter->grf1.di_top_field_first = 0;
3007     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3008
3009     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3010     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3011     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3012
3013     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3014     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3015
3016     pp_dndi_context->dest_w = w;
3017     pp_dndi_context->dest_h = h;
3018
3019     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3020
3021     return VA_STATUS_SUCCESS;
3022 }
3023
3024 static int
3025 gen7_pp_dn_x_steps(void *private_context)
3026 {
3027     return 1;
3028 }
3029
3030 static int
3031 gen7_pp_dn_y_steps(void *private_context)
3032 {
3033     struct pp_dn_context *pp_dn_context = private_context;
3034
3035     return pp_dn_context->dest_h / 4;
3036 }
3037
3038 static int
3039 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3040 {
3041     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3042
3043     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3044     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3045
3046     return 0;
3047 }
3048
3049 static VAStatus
3050 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3051                            const struct i965_surface *src_surface,
3052                            const VARectangle *src_rect,
3053                            struct i965_surface *dst_surface,
3054                            const VARectangle *dst_rect,
3055                            void *filter_param)
3056 {
3057     struct i965_driver_data *i965 = i965_driver_data(ctx);
3058     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3059     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3060     struct object_surface *obj_surface;
3061     struct gen7_sampler_dndi *sampler_dn;
3062     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3063     int index;
3064     int w, h;
3065     int orig_w, orig_h;
3066     int dn_strength = 15;
3067     int dndi_top_first = 1;
3068     int dn_progressive = 0;
3069
3070     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3071         dndi_top_first = 1;
3072         dn_progressive = 1;
3073     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3074         dndi_top_first = 1;
3075         dn_progressive = 0;
3076     } else {
3077         dndi_top_first = 0;
3078         dn_progressive = 0;
3079     }
3080
3081     if (dn_filter_param) {
3082         float value = dn_filter_param->value;
3083         
3084         if (value > 1.0)
3085             value = 1.0;
3086         
3087         if (value < 0.0)
3088             value = 0.0;
3089
3090         dn_strength = (int)(value * 31.0F);
3091     }
3092
3093     /* surface */
3094     obj_surface = SURFACE(src_surface->id);
3095     orig_w = obj_surface->orig_width;
3096     orig_h = obj_surface->orig_height;
3097     w = obj_surface->width;
3098     h = obj_surface->height;
3099
3100     if (pp_context->stmm.bo == NULL) {
3101         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3102                                            "STMM surface",
3103                                            w * h,
3104                                            4096);
3105         assert(pp_context->stmm.bo);
3106     }
3107
3108     /* source UV surface index 1 */
3109     gen7_pp_set_surface_state(ctx, pp_context,
3110                               obj_surface->bo, w * h,
3111                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3112                               1, 0);
3113
3114     /* source YUV surface index 3 */
3115     gen7_pp_set_surface2_state(ctx, pp_context,
3116                                obj_surface->bo, 0,
3117                                orig_w, orig_h, w,
3118                                0, h,
3119                                SURFACE_FORMAT_PLANAR_420_8, 1,
3120                                3);
3121
3122     /* source STMM surface index 5 */
3123     gen7_pp_set_surface_state(ctx, pp_context,
3124                               pp_context->stmm.bo, 0,
3125                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3126                               5, 1);
3127
3128     /* destination surface */
3129     obj_surface = SURFACE(dst_surface->id);
3130     orig_w = obj_surface->orig_width;
3131     orig_h = obj_surface->orig_height;
3132     w = obj_surface->width;
3133     h = obj_surface->height;
3134
3135     /* destination Y surface index 7 */
3136     gen7_pp_set_surface_state(ctx, pp_context,
3137                               obj_surface->bo, 0,
3138                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3139                               7, 1);
3140
3141     /* destination UV surface index 8 */
3142     gen7_pp_set_surface_state(ctx, pp_context,
3143                               obj_surface->bo, w * h,
3144                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3145                               8, 1);
3146     /* sampler dn */
3147     dri_bo_map(pp_context->sampler_state_table.bo, True);
3148     assert(pp_context->sampler_state_table.bo->virtual);
3149     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3150     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3151
3152     /* sample dn index 1 */
3153     index = 0;
3154     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3155     sampler_dn[index].dw0.dnmh_delt = 8;
3156     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3157     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3158     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3159     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3160
3161     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3162     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3163     sampler_dn[index].dw1.stmm_c2 = 0;
3164     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3165     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3166
3167     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3168     sampler_dn[index].dw2.bne_edge_th = 1;
3169     sampler_dn[index].dw2.smooth_mv_th = 0;
3170     sampler_dn[index].dw2.sad_tight_th = 5;
3171     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3172     sampler_dn[index].dw2.good_neighbor_th = 4;
3173
3174     sampler_dn[index].dw3.maximum_stmm = 128;
3175     sampler_dn[index].dw3.multipler_for_vecm = 2;
3176     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3177     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3178     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3179
3180     sampler_dn[index].dw4.sdi_delta = 8;
3181     sampler_dn[index].dw4.sdi_threshold = 128;
3182     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3183     sampler_dn[index].dw4.stmm_shift_up = 0;
3184     sampler_dn[index].dw4.stmm_shift_down = 0;
3185     sampler_dn[index].dw4.minimum_stmm = 0;
3186
3187     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3188     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3189     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3190     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3191
3192     sampler_dn[index].dw6.dn_enable = 1;
3193     sampler_dn[index].dw6.di_enable = 0;
3194     sampler_dn[index].dw6.di_partial = 0;
3195     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3196     sampler_dn[index].dw6.dndi_stream_id = 1;
3197     sampler_dn[index].dw6.dndi_first_frame = 1;
3198     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3199     sampler_dn[index].dw6.mcdi_enable = 0;
3200     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3201     sampler_dn[index].dw6.cat_th1 = 0;
3202     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3203     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3204
3205     sampler_dn[index].dw7.sad_tha = 5;
3206     sampler_dn[index].dw7.sad_thb = 10;
3207     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3208     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3209     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3210     sampler_dn[index].dw7.vdi_walker_enable = 0;
3211     sampler_dn[index].dw7.neighborpixel_th = 10;
3212     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3213
3214     dri_bo_unmap(pp_context->sampler_state_table.bo);
3215
3216     /* private function & data */
3217     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3218     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3219     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3220
3221     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3222     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3223     pp_static_parameter->grf1.di_top_field_first = 0;
3224     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3225
3226     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3227     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3228     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3229
3230     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3231     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3232
3233     pp_dn_context->dest_w = w;
3234     pp_dn_context->dest_h = h;
3235
3236     dst_surface->flags = src_surface->flags;
3237
3238     return VA_STATUS_SUCCESS;
3239 }
3240
3241 static VAStatus
3242 ironlake_pp_initialize(
3243     VADriverContextP   ctx,
3244     struct i965_post_processing_context *pp_context,
3245     const struct i965_surface *src_surface,
3246     const VARectangle *src_rect,
3247     struct i965_surface *dst_surface,
3248     const VARectangle *dst_rect,
3249     int                pp_index,
3250     void *filter_param
3251 )
3252 {
3253     VAStatus va_status;
3254     struct i965_driver_data *i965 = i965_driver_data(ctx);
3255     struct pp_module *pp_module;
3256     dri_bo *bo;
3257     int static_param_size, inline_param_size;
3258
3259     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3260     bo = dri_bo_alloc(i965->intel.bufmgr,
3261                       "surface state & binding table",
3262                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3263                       4096);
3264     assert(bo);
3265     pp_context->surface_state_binding_table.bo = bo;
3266
3267     dri_bo_unreference(pp_context->curbe.bo);
3268     bo = dri_bo_alloc(i965->intel.bufmgr,
3269                       "constant buffer",
3270                       4096, 
3271                       4096);
3272     assert(bo);
3273     pp_context->curbe.bo = bo;
3274
3275     dri_bo_unreference(pp_context->idrt.bo);
3276     bo = dri_bo_alloc(i965->intel.bufmgr, 
3277                       "interface discriptor", 
3278                       sizeof(struct i965_interface_descriptor), 
3279                       4096);
3280     assert(bo);
3281     pp_context->idrt.bo = bo;
3282     pp_context->idrt.num_interface_descriptors = 0;
3283
3284     dri_bo_unreference(pp_context->sampler_state_table.bo);
3285     bo = dri_bo_alloc(i965->intel.bufmgr, 
3286                       "sampler state table", 
3287                       4096,
3288                       4096);
3289     assert(bo);
3290     dri_bo_map(bo, True);
3291     memset(bo->virtual, 0, bo->size);
3292     dri_bo_unmap(bo);
3293     pp_context->sampler_state_table.bo = bo;
3294
3295     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3296     bo = dri_bo_alloc(i965->intel.bufmgr, 
3297                       "sampler 8x8 state ",
3298                       4096,
3299                       4096);
3300     assert(bo);
3301     pp_context->sampler_state_table.bo_8x8 = bo;
3302
3303     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3304     bo = dri_bo_alloc(i965->intel.bufmgr, 
3305                       "sampler 8x8 state ",
3306                       4096,
3307                       4096);
3308     assert(bo);
3309     pp_context->sampler_state_table.bo_8x8_uv = bo;
3310
3311     dri_bo_unreference(pp_context->vfe_state.bo);
3312     bo = dri_bo_alloc(i965->intel.bufmgr, 
3313                       "vfe state", 
3314                       sizeof(struct i965_vfe_state), 
3315                       4096);
3316     assert(bo);
3317     pp_context->vfe_state.bo = bo;
3318
3319     static_param_size = sizeof(struct pp_static_parameter);
3320     inline_param_size = sizeof(struct pp_inline_parameter);
3321
3322     memset(pp_context->pp_static_parameter, 0, static_param_size);
3323     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3324     
3325     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3326     pp_context->current_pp = pp_index;
3327     pp_module = &pp_context->pp_modules[pp_index];
3328     
3329     if (pp_module->initialize)
3330         va_status = pp_module->initialize(ctx, pp_context,
3331                                           src_surface,
3332                                           src_rect,
3333                                           dst_surface,
3334                                           dst_rect,
3335                                           filter_param);
3336     else
3337         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3338
3339     return va_status;
3340 }
3341
3342 static VAStatus
3343 ironlake_post_processing(
3344     VADriverContextP   ctx,
3345     struct i965_post_processing_context *pp_context,
3346     const struct i965_surface *src_surface,
3347     const VARectangle *src_rect,
3348     struct i965_surface *dst_surface,
3349     const VARectangle *dst_rect,
3350     int                pp_index,
3351     void *filter_param
3352 )
3353 {
3354     VAStatus va_status;
3355
3356     va_status = ironlake_pp_initialize(ctx, pp_context,
3357                                        src_surface,
3358                                        src_rect,
3359                                        dst_surface,
3360                                        dst_rect,
3361                                        pp_index,
3362                                        filter_param);
3363
3364     if (va_status == VA_STATUS_SUCCESS) {
3365         ironlake_pp_states_setup(ctx, pp_context);
3366         ironlake_pp_pipeline_setup(ctx, pp_context);
3367     }
3368
3369     return va_status;
3370 }
3371
3372 static VAStatus
3373 gen6_pp_initialize(
3374     VADriverContextP   ctx,
3375     struct i965_post_processing_context *pp_context,
3376     const struct i965_surface *src_surface,
3377     const VARectangle *src_rect,
3378     struct i965_surface *dst_surface,
3379     const VARectangle *dst_rect,
3380     int                pp_index,
3381     void *filter_param
3382 )
3383 {
3384     VAStatus va_status;
3385     struct i965_driver_data *i965 = i965_driver_data(ctx);
3386     struct pp_module *pp_module;
3387     dri_bo *bo;
3388     int static_param_size, inline_param_size;
3389
3390     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3391     bo = dri_bo_alloc(i965->intel.bufmgr,
3392                       "surface state & binding table",
3393                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3394                       4096);
3395     assert(bo);
3396     pp_context->surface_state_binding_table.bo = bo;
3397
3398     dri_bo_unreference(pp_context->curbe.bo);
3399     bo = dri_bo_alloc(i965->intel.bufmgr,
3400                       "constant buffer",
3401                       4096, 
3402                       4096);
3403     assert(bo);
3404     pp_context->curbe.bo = bo;
3405
3406     dri_bo_unreference(pp_context->idrt.bo);
3407     bo = dri_bo_alloc(i965->intel.bufmgr, 
3408                       "interface discriptor", 
3409                       sizeof(struct gen6_interface_descriptor_data), 
3410                       4096);
3411     assert(bo);
3412     pp_context->idrt.bo = bo;
3413     pp_context->idrt.num_interface_descriptors = 0;
3414
3415     dri_bo_unreference(pp_context->sampler_state_table.bo);
3416     bo = dri_bo_alloc(i965->intel.bufmgr, 
3417                       "sampler state table", 
3418                       4096,
3419                       4096);
3420     assert(bo);
3421     dri_bo_map(bo, True);
3422     memset(bo->virtual, 0, bo->size);
3423     dri_bo_unmap(bo);
3424     pp_context->sampler_state_table.bo = bo;
3425
3426     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3427     bo = dri_bo_alloc(i965->intel.bufmgr, 
3428                       "sampler 8x8 state ",
3429                       4096,
3430                       4096);
3431     assert(bo);
3432     pp_context->sampler_state_table.bo_8x8 = bo;
3433
3434     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3435     bo = dri_bo_alloc(i965->intel.bufmgr, 
3436                       "sampler 8x8 state ",
3437                       4096,
3438                       4096);
3439     assert(bo);
3440     pp_context->sampler_state_table.bo_8x8_uv = bo;
3441
3442     dri_bo_unreference(pp_context->vfe_state.bo);
3443     bo = dri_bo_alloc(i965->intel.bufmgr, 
3444                       "vfe state", 
3445                       sizeof(struct i965_vfe_state), 
3446                       4096);
3447     assert(bo);
3448     pp_context->vfe_state.bo = bo;
3449     
3450     if (IS_GEN7(i965->intel.device_id)) {
3451         static_param_size = sizeof(struct gen7_pp_static_parameter);
3452         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3453     } else {
3454         static_param_size = sizeof(struct pp_static_parameter);
3455         inline_param_size = sizeof(struct pp_inline_parameter);
3456     }
3457
3458     memset(pp_context->pp_static_parameter, 0, static_param_size);
3459     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3460
3461     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3462     pp_context->current_pp = pp_index;
3463     pp_module = &pp_context->pp_modules[pp_index];
3464     
3465     if (pp_module->initialize)
3466         va_status = pp_module->initialize(ctx, pp_context,
3467                                           src_surface,
3468                                           src_rect,
3469                                           dst_surface,
3470                                           dst_rect,
3471                                           filter_param);
3472     else
3473         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3474
3475     return va_status;
3476 }
3477
3478 static void
3479 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3480                                    struct i965_post_processing_context *pp_context)
3481 {
3482     struct i965_driver_data *i965 = i965_driver_data(ctx);
3483     struct gen6_interface_descriptor_data *desc;
3484     dri_bo *bo;
3485     int pp_index = pp_context->current_pp;
3486
3487     bo = pp_context->idrt.bo;
3488     dri_bo_map(bo, True);
3489     assert(bo->virtual);
3490     desc = bo->virtual;
3491     memset(desc, 0, sizeof(*desc));
3492     desc->desc0.kernel_start_pointer = 
3493         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3494     desc->desc1.single_program_flow = 1;
3495     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3496     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3497     desc->desc2.sampler_state_pointer = 
3498         pp_context->sampler_state_table.bo->offset >> 5;
3499     desc->desc3.binding_table_entry_count = 0;
3500     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3501     desc->desc4.constant_urb_entry_read_offset = 0;
3502
3503     if (IS_GEN7(i965->intel.device_id))
3504         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3505     else
3506         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3507
3508     dri_bo_emit_reloc(bo,
3509                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3510                       0,
3511                       offsetof(struct gen6_interface_descriptor_data, desc0),
3512                       pp_context->pp_modules[pp_index].kernel.bo);
3513
3514     dri_bo_emit_reloc(bo,
3515                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3516                       desc->desc2.sampler_count << 2,
3517                       offsetof(struct gen6_interface_descriptor_data, desc2),
3518                       pp_context->sampler_state_table.bo);
3519
3520     dri_bo_unmap(bo);
3521     pp_context->idrt.num_interface_descriptors++;
3522 }
3523
3524 static void
3525 gen6_pp_upload_constants(VADriverContextP ctx,
3526                          struct i965_post_processing_context *pp_context)
3527 {
3528     struct i965_driver_data *i965 = i965_driver_data(ctx);
3529     unsigned char *constant_buffer;
3530     int param_size;
3531
3532     assert(sizeof(struct pp_static_parameter) == 128);
3533     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3534
3535     if (IS_GEN7(i965->intel.device_id))
3536         param_size = sizeof(struct gen7_pp_static_parameter);
3537     else
3538         param_size = sizeof(struct pp_static_parameter);
3539
3540     dri_bo_map(pp_context->curbe.bo, 1);
3541     assert(pp_context->curbe.bo->virtual);
3542     constant_buffer = pp_context->curbe.bo->virtual;
3543     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3544     dri_bo_unmap(pp_context->curbe.bo);
3545 }
3546
3547 static void
3548 gen6_pp_states_setup(VADriverContextP ctx,
3549                      struct i965_post_processing_context *pp_context)
3550 {
3551     gen6_pp_interface_descriptor_table(ctx, pp_context);
3552     gen6_pp_upload_constants(ctx, pp_context);
3553 }
3554
3555 static void
3556 gen6_pp_pipeline_select(VADriverContextP ctx,
3557                         struct i965_post_processing_context *pp_context)
3558 {
3559     struct intel_batchbuffer *batch = pp_context->batch;
3560
3561     BEGIN_BATCH(batch, 1);
3562     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3563     ADVANCE_BATCH(batch);
3564 }
3565
3566 static void
3567 gen6_pp_state_base_address(VADriverContextP ctx,
3568                            struct i965_post_processing_context *pp_context)
3569 {
3570     struct intel_batchbuffer *batch = pp_context->batch;
3571
3572     BEGIN_BATCH(batch, 10);
3573     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3574     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3575     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3576     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3577     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3578     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3579     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3580     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3581     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3582     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3583     ADVANCE_BATCH(batch);
3584 }
3585
3586 static void
3587 gen6_pp_vfe_state(VADriverContextP ctx,
3588                   struct i965_post_processing_context *pp_context)
3589 {
3590     struct intel_batchbuffer *batch = pp_context->batch;
3591
3592     BEGIN_BATCH(batch, 8);
3593     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3594     OUT_BATCH(batch, 0);
3595     OUT_BATCH(batch,
3596               (pp_context->urb.num_vfe_entries - 1) << 16 |
3597               pp_context->urb.num_vfe_entries << 8);
3598     OUT_BATCH(batch, 0);
3599     OUT_BATCH(batch,
3600               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3601               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3602     OUT_BATCH(batch, 0);
3603     OUT_BATCH(batch, 0);
3604     OUT_BATCH(batch, 0);
3605     ADVANCE_BATCH(batch);
3606 }
3607
3608 static void
3609 gen6_pp_curbe_load(VADriverContextP ctx,
3610                    struct i965_post_processing_context *pp_context)
3611 {
3612     struct intel_batchbuffer *batch = pp_context->batch;
3613
3614     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3615
3616     BEGIN_BATCH(batch, 4);
3617     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3618     OUT_BATCH(batch, 0);
3619     OUT_BATCH(batch,
3620               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3621     OUT_RELOC(batch, 
3622               pp_context->curbe.bo,
3623               I915_GEM_DOMAIN_INSTRUCTION, 0,
3624               0);
3625     ADVANCE_BATCH(batch);
3626 }
3627
3628 static void
3629 gen6_interface_descriptor_load(VADriverContextP ctx,
3630                                struct i965_post_processing_context *pp_context)
3631 {
3632     struct intel_batchbuffer *batch = pp_context->batch;
3633
3634     BEGIN_BATCH(batch, 4);
3635     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3636     OUT_BATCH(batch, 0);
3637     OUT_BATCH(batch,
3638               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3639     OUT_RELOC(batch, 
3640               pp_context->idrt.bo,
3641               I915_GEM_DOMAIN_INSTRUCTION, 0,
3642               0);
3643     ADVANCE_BATCH(batch);
3644 }
3645
3646 static void
3647 gen6_pp_object_walker(VADriverContextP ctx,
3648                       struct i965_post_processing_context *pp_context)
3649 {
3650     struct i965_driver_data *i965 = i965_driver_data(ctx);
3651     struct intel_batchbuffer *batch = pp_context->batch;
3652     int x, x_steps, y, y_steps;
3653     int param_size, command_length_in_dws;
3654     dri_bo *command_buffer;
3655     unsigned int *command_ptr;
3656
3657     if (IS_GEN7(i965->intel.device_id))
3658         param_size = sizeof(struct gen7_pp_inline_parameter);
3659     else
3660         param_size = sizeof(struct pp_inline_parameter);
3661
3662     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3663     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3664     command_length_in_dws = 6 + (param_size >> 2);
3665     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3666                                   "command objects buffer",
3667                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3668                                   4096);
3669
3670     dri_bo_map(command_buffer, 1);
3671     command_ptr = command_buffer->virtual;
3672
3673     for (y = 0; y < y_steps; y++) {
3674         for (x = 0; x < x_steps; x++) {
3675             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3676                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3677                 *command_ptr++ = 0;
3678                 *command_ptr++ = 0;
3679                 *command_ptr++ = 0;
3680                 *command_ptr++ = 0;
3681                 *command_ptr++ = 0;
3682                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3683                 command_ptr += (param_size >> 2);
3684             }
3685         }
3686     }
3687
3688     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3689         *command_ptr++ = 0;
3690
3691     *command_ptr = MI_BATCH_BUFFER_END;
3692
3693     dri_bo_unmap(command_buffer);
3694
3695     BEGIN_BATCH(batch, 2);
3696     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3697     OUT_RELOC(batch, command_buffer, 
3698               I915_GEM_DOMAIN_COMMAND, 0, 
3699               0);
3700     ADVANCE_BATCH(batch);
3701     
3702     dri_bo_unreference(command_buffer);
3703
3704     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3705      * will cause control to pass back to ring buffer 
3706      */
3707     intel_batchbuffer_end_atomic(batch);
3708     intel_batchbuffer_flush(batch);
3709     intel_batchbuffer_start_atomic(batch, 0x1000);
3710 }
3711
3712 static void
3713 gen6_pp_pipeline_setup(VADriverContextP ctx,
3714                        struct i965_post_processing_context *pp_context)
3715 {
3716     struct intel_batchbuffer *batch = pp_context->batch;
3717
3718     intel_batchbuffer_start_atomic(batch, 0x1000);
3719     intel_batchbuffer_emit_mi_flush(batch);
3720     gen6_pp_pipeline_select(ctx, pp_context);
3721     gen6_pp_state_base_address(ctx, pp_context);
3722     gen6_pp_vfe_state(ctx, pp_context);
3723     gen6_pp_curbe_load(ctx, pp_context);
3724     gen6_interface_descriptor_load(ctx, pp_context);
3725     gen6_pp_object_walker(ctx, pp_context);
3726     intel_batchbuffer_end_atomic(batch);
3727 }
3728
3729 static VAStatus
3730 gen6_post_processing(
3731     VADriverContextP   ctx,
3732     struct i965_post_processing_context *pp_context,
3733     const struct i965_surface *src_surface,
3734     const VARectangle *src_rect,
3735     struct i965_surface *dst_surface,
3736     const VARectangle *dst_rect,
3737     int                pp_index,
3738     void * filter_param
3739 )
3740 {
3741     VAStatus va_status;
3742     
3743     va_status = gen6_pp_initialize(ctx, pp_context,
3744                                    src_surface,
3745                                    src_rect,
3746                                    dst_surface,
3747                                    dst_rect,
3748                                    pp_index,
3749                                    filter_param);
3750
3751     if (va_status == VA_STATUS_SUCCESS) {
3752         gen6_pp_states_setup(ctx, pp_context);
3753         gen6_pp_pipeline_setup(ctx, pp_context);
3754     }
3755
3756     return va_status;
3757 }
3758
3759 static VAStatus
3760 i965_post_processing_internal(
3761     VADriverContextP   ctx,
3762     struct i965_post_processing_context *pp_context,
3763     const struct i965_surface *src_surface,
3764     const VARectangle *src_rect,
3765     struct i965_surface *dst_surface,
3766     const VARectangle *dst_rect,
3767     int                pp_index,
3768     void *filter_param
3769 )
3770 {
3771     struct i965_driver_data *i965 = i965_driver_data(ctx);
3772     VAStatus va_status;
3773
3774     if (IS_GEN6(i965->intel.device_id) ||
3775         IS_GEN7(i965->intel.device_id))
3776         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3777     else
3778         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3779     
3780     return va_status;
3781 }
3782
3783 VAStatus 
3784 i965_DestroySurfaces(VADriverContextP ctx,
3785                      VASurfaceID *surface_list,
3786                      int num_surfaces);
3787 VAStatus 
3788 i965_CreateSurfaces(VADriverContextP ctx,
3789                     int width,
3790                     int height,
3791                     int format,
3792                     int num_surfaces,
3793                     VASurfaceID *surfaces);
3794
3795 static void
3796 rgb_to_yuv(unsigned int argb,
3797            unsigned char *y,
3798            unsigned char *u,
3799            unsigned char *v,
3800            unsigned char *a)
3801 {
3802     int r = ((argb >> 16) & 0xff);
3803     int g = ((argb >> 8) & 0xff);
3804     int b = ((argb >> 0) & 0xff);
3805     
3806     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
3807     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
3808     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
3809     *a = ((argb >> 24) & 0xff);
3810 }
3811
3812 static void 
3813 i965_vpp_clear_surface(VADriverContextP ctx,
3814                        struct i965_post_processing_context *pp_context,
3815                        VASurfaceID surface,
3816                        unsigned int color)
3817 {
3818     struct i965_driver_data *i965 = i965_driver_data(ctx);
3819     struct intel_batchbuffer *batch = pp_context->batch;
3820     struct object_surface *obj_surface = SURFACE(surface);
3821     unsigned int blt_cmd, br13;
3822     unsigned int tiling = 0, swizzle = 0;
3823     int pitch;
3824     unsigned char y, u, v, a = 0;
3825
3826     /* Currently only support NV12 surface */
3827     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3828         return;
3829
3830     rgb_to_yuv(color, &y, &u, &v, &a);
3831
3832     if (a == 0)
3833         return;
3834
3835     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3836     blt_cmd = XY_COLOR_BLT_CMD;
3837     pitch = obj_surface->width;
3838
3839     if (tiling != I915_TILING_NONE) {
3840         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3841         pitch >>= 2;
3842     }
3843
3844     br13 = 0xf0 << 16;
3845     br13 |= BR13_8;
3846     br13 |= pitch;
3847
3848     if (IS_GEN6(i965->intel.device_id) ||
3849         IS_GEN7(i965->intel.device_id)) {
3850         intel_batchbuffer_start_atomic_blt(batch, 48);
3851         BEGIN_BLT_BATCH(batch, 12);
3852     } else {
3853         intel_batchbuffer_start_atomic(batch, 48);
3854         BEGIN_BATCH(batch, 12);
3855     }
3856
3857     OUT_BATCH(batch, blt_cmd);
3858     OUT_BATCH(batch, br13);
3859     OUT_BATCH(batch,
3860               0 << 16 |
3861               0);
3862     OUT_BATCH(batch,
3863               obj_surface->height << 16 |
3864               obj_surface->width);
3865     OUT_RELOC(batch, obj_surface->bo, 
3866               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3867               0);
3868     OUT_BATCH(batch, y);
3869
3870     br13 = 0xf0 << 16;
3871     br13 |= BR13_565;
3872     br13 |= pitch;
3873
3874     OUT_BATCH(batch, blt_cmd);
3875     OUT_BATCH(batch, br13);
3876     OUT_BATCH(batch,
3877               0 << 16 |
3878               0);
3879     OUT_BATCH(batch,
3880               obj_surface->height / 2 << 16 |
3881               obj_surface->width / 2);
3882     OUT_RELOC(batch, obj_surface->bo, 
3883               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3884               obj_surface->width * obj_surface->y_cb_offset);
3885     OUT_BATCH(batch, v << 8 | u);
3886
3887     ADVANCE_BATCH(batch);
3888     intel_batchbuffer_end_atomic(batch);
3889 }
3890
3891 VASurfaceID
3892 i965_post_processing(
3893     VADriverContextP   ctx,
3894     VASurfaceID        surface,
3895     const VARectangle *src_rect,
3896     const VARectangle *dst_rect,
3897     unsigned int       flags,
3898     int               *has_done_scaling  
3899 )
3900 {
3901     struct i965_driver_data *i965 = i965_driver_data(ctx);
3902     VASurfaceID in_surface_id = surface;
3903     VASurfaceID out_surface_id = VA_INVALID_ID;
3904     
3905     *has_done_scaling = 0;
3906
3907     if (HAS_PP(i965)) {
3908         struct object_surface *obj_surface;
3909         VAStatus status;
3910         struct i965_surface src_surface;
3911         struct i965_surface dst_surface;
3912
3913         obj_surface = SURFACE(in_surface_id);
3914
3915         /* Currently only support post processing for NV12 surface */
3916         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3917             return out_surface_id;
3918
3919         _i965LockMutex(&i965->pp_mutex);
3920
3921         if (flags & I965_PP_FLAG_MCDI) {
3922             status = i965_CreateSurfaces(ctx,
3923                                          obj_surface->orig_width,
3924                                          obj_surface->orig_height,
3925                                          VA_RT_FORMAT_YUV420,
3926                                          1,
3927                                          &out_surface_id);
3928             assert(status == VA_STATUS_SUCCESS);
3929             obj_surface = SURFACE(out_surface_id);
3930             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3931             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3932             src_surface.id = in_surface_id;
3933             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3934             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
3935                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
3936             dst_surface.id = out_surface_id;
3937             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3938             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3939
3940             i965_post_processing_internal(ctx, i965->pp_context,
3941                                           &src_surface,
3942                                           src_rect,
3943                                           &dst_surface,
3944                                           dst_rect,
3945                                           PP_NV12_DNDI,
3946                                           NULL);
3947         }
3948
3949         if (flags & I965_PP_FLAG_AVS) {
3950             struct i965_render_state *render_state = &i965->render_state;
3951             struct intel_region *dest_region = render_state->draw_region;
3952
3953             if (out_surface_id != VA_INVALID_ID)
3954                 in_surface_id = out_surface_id;
3955
3956             status = i965_CreateSurfaces(ctx,
3957                                          dest_region->width,
3958                                          dest_region->height,
3959                                          VA_RT_FORMAT_YUV420,
3960                                          1,
3961                                          &out_surface_id);
3962             assert(status == VA_STATUS_SUCCESS);
3963             obj_surface = SURFACE(out_surface_id);
3964             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
3965             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
3966             src_surface.id = in_surface_id;
3967             src_surface.type = I965_SURFACE_TYPE_SURFACE;
3968             src_surface.flags = I965_SURFACE_FLAG_FRAME;
3969             dst_surface.id = out_surface_id;
3970             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
3971             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
3972
3973             i965_post_processing_internal(ctx, i965->pp_context,
3974                                           &src_surface,
3975                                           src_rect,
3976                                           &dst_surface,
3977                                           dst_rect,
3978                                           PP_NV12_AVS,
3979                                           NULL);
3980
3981             if (in_surface_id != surface)
3982                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
3983                 
3984             *has_done_scaling = 1;
3985         }
3986
3987         _i965UnlockMutex(&i965->pp_mutex);
3988     }
3989
3990     return out_surface_id;
3991 }       
3992
3993 static VAStatus
3994 i965_image_pl3_processing(VADriverContextP ctx,
3995                           const struct i965_surface *src_surface,
3996                           const VARectangle *src_rect,
3997                           struct i965_surface *dst_surface,
3998                           const VARectangle *dst_rect)
3999 {
4000     struct i965_driver_data *i965 = i965_driver_data(ctx);
4001     struct i965_post_processing_context *pp_context = i965->pp_context;
4002     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4003     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4004
4005     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4006         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4007                                                  src_surface,
4008                                                  src_rect,
4009                                                  dst_surface,
4010                                                  dst_rect,
4011                                                  PP_PL3_LOAD_SAVE_N12,
4012                                                  NULL);
4013     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4014                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4015                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4016                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4017         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4018                                                  src_surface,
4019                                                  src_rect,
4020                                                  dst_surface,
4021                                                  dst_rect,
4022                                                  PP_PL3_LOAD_SAVE_PL3,
4023                                                  NULL);
4024     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
4025         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4026                                                  src_surface,
4027                                                  src_rect,
4028                                                  dst_surface,
4029                                                  dst_rect,
4030                                                  PP_PL3_LOAD_SAVE_PA,
4031                                                  NULL);
4032     }
4033     else {
4034         assert(0);
4035     }
4036
4037     intel_batchbuffer_flush(pp_context->batch);
4038
4039     return vaStatus;
4040 }
4041
4042 static VAStatus
4043 i965_image_pl2_processing(VADriverContextP ctx,
4044                           const struct i965_surface *src_surface,
4045                           const VARectangle *src_rect,
4046                           struct i965_surface *dst_surface,
4047                           const VARectangle *dst_rect)
4048 {
4049     struct i965_driver_data *i965 = i965_driver_data(ctx);
4050     struct i965_post_processing_context *pp_context = i965->pp_context;
4051     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4052     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4053
4054     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4055         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4056                                                  src_surface,
4057                                                  src_rect,
4058                                                  dst_surface,
4059                                                  dst_rect,
4060                                                  PP_NV12_LOAD_SAVE_N12,
4061                                                  NULL);
4062     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4063                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4064                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4065                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4066         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4067                                                  src_surface,
4068                                                  src_rect,
4069                                                  dst_surface,
4070                                                  dst_rect,
4071                                                  PP_NV12_LOAD_SAVE_PL3,
4072                                                  NULL);
4073     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
4074         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4075                                                  src_surface,
4076                                                  src_rect,
4077                                                  dst_surface,
4078                                                  dst_rect,
4079                                                  PP_NV12_LOAD_SAVE_PA,
4080                                                      NULL);
4081     }
4082
4083     intel_batchbuffer_flush(pp_context->batch);
4084
4085     return vaStatus;
4086 }
4087
4088 static VAStatus
4089 i965_image_pl1_processing(VADriverContextP ctx,
4090                           const struct i965_surface *src_surface,
4091                           const VARectangle *src_rect,
4092                           struct i965_surface *dst_surface,
4093                           const VARectangle *dst_rect)
4094 {
4095     struct i965_driver_data *i965 = i965_driver_data(ctx);
4096     struct i965_post_processing_context *pp_context = i965->pp_context;
4097     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4098
4099     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4100         i965_post_processing_internal(ctx, i965->pp_context,
4101                                       src_surface,
4102                                       src_rect,
4103                                       dst_surface,
4104                                       dst_rect,
4105                                       PP_PA_LOAD_SAVE_NV12,
4106                                       NULL);
4107     }
4108     else if (fourcc == VA_FOURCC_YV12) {
4109         i965_post_processing_internal(ctx, i965->pp_context,
4110                                       src_surface,
4111                                       src_rect,
4112                                       dst_surface,
4113                                       dst_rect,
4114                                       PP_PA_LOAD_SAVE_PL3,
4115                                       NULL);
4116
4117     }
4118     else {
4119         return VA_STATUS_ERROR_UNKNOWN;
4120     }
4121
4122     intel_batchbuffer_flush(pp_context->batch);
4123
4124     return VA_STATUS_SUCCESS;
4125 }
4126
4127 VAStatus
4128 i965_image_processing(VADriverContextP ctx,
4129                       const struct i965_surface *src_surface,
4130                       const VARectangle *src_rect,
4131                       struct i965_surface *dst_surface,
4132                       const VARectangle *dst_rect)
4133 {
4134     struct i965_driver_data *i965 = i965_driver_data(ctx);
4135     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4136
4137     if (HAS_PP(i965)) {
4138         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4139
4140         _i965LockMutex(&i965->pp_mutex);
4141
4142         switch (fourcc) {
4143         case VA_FOURCC('Y', 'V', '1', '2'):
4144         case VA_FOURCC('I', '4', '2', '0'):
4145         case VA_FOURCC('I', 'M', 'C', '1'):
4146         case VA_FOURCC('I', 'M', 'C', '3'):
4147             status = i965_image_pl3_processing(ctx,
4148                                                src_surface,
4149                                                src_rect,
4150                                                dst_surface,
4151                                                dst_rect);
4152             break;
4153
4154         case  VA_FOURCC('N', 'V', '1', '2'):
4155             status = i965_image_pl2_processing(ctx,
4156                                                src_surface,
4157                                                src_rect,
4158                                                dst_surface,
4159                                                dst_rect);
4160             break;
4161         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4162             status = i965_image_pl1_processing(ctx,
4163                                                src_surface,
4164                                                src_rect,
4165                                                dst_surface,
4166                                                dst_rect);
4167             break;
4168
4169         default:
4170             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4171             break;
4172         }
4173         
4174         _i965UnlockMutex(&i965->pp_mutex);
4175     }
4176
4177     return status;
4178 }       
4179
4180 static void
4181 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4182 {
4183     int i;
4184
4185     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4186     pp_context->surface_state_binding_table.bo = NULL;
4187
4188     dri_bo_unreference(pp_context->curbe.bo);
4189     pp_context->curbe.bo = NULL;
4190
4191     dri_bo_unreference(pp_context->sampler_state_table.bo);
4192     pp_context->sampler_state_table.bo = NULL;
4193
4194     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4195     pp_context->sampler_state_table.bo_8x8 = NULL;
4196
4197     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4198     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4199
4200     dri_bo_unreference(pp_context->idrt.bo);
4201     pp_context->idrt.bo = NULL;
4202     pp_context->idrt.num_interface_descriptors = 0;
4203
4204     dri_bo_unreference(pp_context->vfe_state.bo);
4205     pp_context->vfe_state.bo = NULL;
4206
4207     dri_bo_unreference(pp_context->stmm.bo);
4208     pp_context->stmm.bo = NULL;
4209
4210     for (i = 0; i < NUM_PP_MODULES; i++) {
4211         struct pp_module *pp_module = &pp_context->pp_modules[i];
4212
4213         dri_bo_unreference(pp_module->kernel.bo);
4214         pp_module->kernel.bo = NULL;
4215     }
4216
4217     free(pp_context->pp_static_parameter);
4218     free(pp_context->pp_inline_parameter);
4219     pp_context->pp_static_parameter = NULL;
4220     pp_context->pp_inline_parameter = NULL;
4221 }
4222
4223 Bool
4224 i965_post_processing_terminate(VADriverContextP ctx)
4225 {
4226     struct i965_driver_data *i965 = i965_driver_data(ctx);
4227     struct i965_post_processing_context *pp_context = i965->pp_context;
4228
4229     if (pp_context) {
4230         i965_post_processing_context_finalize(pp_context);
4231         free(pp_context);
4232     }
4233
4234     i965->pp_context = NULL;
4235
4236     return True;
4237 }
4238
4239 static void
4240 i965_post_processing_context_init(VADriverContextP ctx,
4241                                   struct i965_post_processing_context *pp_context,
4242                                   struct intel_batchbuffer *batch)
4243 {
4244     struct i965_driver_data *i965 = i965_driver_data(ctx);
4245     int i;
4246
4247     pp_context->urb.size = URB_SIZE((&i965->intel));
4248     pp_context->urb.num_vfe_entries = 32;
4249     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4250     pp_context->urb.num_cs_entries = 1;
4251     
4252     if (IS_GEN7(i965->intel.device_id))
4253         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4254     else
4255         pp_context->urb.size_cs_entry = 2;
4256
4257     pp_context->urb.vfe_start = 0;
4258     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4259         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4260     assert(pp_context->urb.cs_start + 
4261            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4262
4263     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4264     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4265     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4266
4267     if (IS_GEN7(i965->intel.device_id))
4268         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4269     else if (IS_GEN6(i965->intel.device_id))
4270         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4271     else if (IS_IRONLAKE(i965->intel.device_id))
4272         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4273
4274     for (i = 0; i < NUM_PP_MODULES; i++) {
4275         struct pp_module *pp_module = &pp_context->pp_modules[i];
4276         dri_bo_unreference(pp_module->kernel.bo);
4277         if (pp_module->kernel.bin && pp_module->kernel.size) {
4278             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4279                                                 pp_module->kernel.name,
4280                                                 pp_module->kernel.size,
4281                                                 4096);
4282             assert(pp_module->kernel.bo);
4283             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4284         } else {
4285             pp_module->kernel.bo = NULL;
4286         }
4287     }
4288
4289     /* static & inline parameters */
4290     if (IS_GEN7(i965->intel.device_id)) {
4291         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4292         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4293     } else {
4294         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4295         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4296     }
4297
4298     pp_context->batch = batch;
4299 }
4300
4301 Bool
4302 i965_post_processing_init(VADriverContextP ctx)
4303 {
4304     struct i965_driver_data *i965 = i965_driver_data(ctx);
4305     struct i965_post_processing_context *pp_context = i965->pp_context;
4306
4307     if (HAS_PP(i965)) {
4308         if (pp_context == NULL) {
4309             pp_context = calloc(1, sizeof(*pp_context));
4310             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4311             i965->pp_context = pp_context;
4312         }
4313     }
4314
4315     return True;
4316 }
4317
4318 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4319     PP_NULL,    /* VAProcFilterNone */
4320     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4321     PP_NULL,    /* VAProcFilterDeblocking */
4322     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4323     PP_NULL,    /* VAProcFilterSharpening */
4324     PP_NULL,    /* VAProcFilterColorBalance */
4325     PP_NULL,    /* VAProcFilterColorStandard */
4326     PP_NULL,    /* VAProcFilterFrameRateConversion */
4327 };
4328
4329 static const int proc_frame_to_pp_frame[3] = {
4330     I965_SURFACE_FLAG_FRAME,
4331     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4332     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4333 };
4334
4335 static void 
4336 i965_proc_picture(VADriverContextP ctx, 
4337                   VAProfile profile, 
4338                   union codec_state *codec_state,
4339                   struct hw_context *hw_context)
4340 {
4341     struct i965_driver_data *i965 = i965_driver_data(ctx);
4342     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4343     struct proc_state *proc_state = &codec_state->proc;
4344     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4345     struct object_surface *obj_surface;
4346     struct i965_surface src_surface, dst_surface;
4347     VARectangle src_rect, dst_rect;
4348     VAStatus status;
4349     int i;
4350     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4351     int num_tmp_surfaces = 0;
4352     unsigned int tiling = 0, swizzle = 0;
4353     int in_width, in_height;
4354
4355     assert(pipeline_param->surface != VA_INVALID_ID);
4356     assert(proc_state->current_render_target != VA_INVALID_ID);
4357
4358     obj_surface = SURFACE(pipeline_param->surface);
4359     in_width = obj_surface->orig_width;
4360     in_height = obj_surface->orig_height;
4361     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4362
4363     src_surface.id = pipeline_param->surface;
4364     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4365     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4366
4367     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4368         VASurfaceID out_surface_id = VA_INVALID_ID;
4369
4370         src_surface.id = pipeline_param->surface;
4371         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4372         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4373         src_rect.x = 0;
4374         src_rect.y = 0;
4375         src_rect.width = in_width;
4376         src_rect.height = in_height;
4377
4378         status = i965_CreateSurfaces(ctx,
4379                                      in_width,
4380                                      in_height,
4381                                      VA_RT_FORMAT_YUV420,
4382                                      1,
4383                                      &out_surface_id);
4384         assert(status == VA_STATUS_SUCCESS);
4385         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4386         obj_surface = SURFACE(out_surface_id);
4387         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4388
4389         dst_surface.id = out_surface_id;
4390         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4391         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4392         dst_rect.x = 0;
4393         dst_rect.y = 0;
4394         dst_rect.width = in_width;
4395         dst_rect.height = in_height;
4396
4397         status = i965_image_processing(ctx,
4398                                        &src_surface,
4399                                        &src_rect,
4400                                        &dst_surface,
4401                                        &dst_rect);
4402         assert(status == VA_STATUS_SUCCESS);
4403
4404         src_surface.id = out_surface_id;
4405         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4406         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4407     }
4408
4409     if (pipeline_param->surface_region) {
4410         src_rect.x = pipeline_param->surface_region->x;
4411         src_rect.y = pipeline_param->surface_region->y;
4412         src_rect.width = pipeline_param->surface_region->width;
4413         src_rect.height = pipeline_param->surface_region->height;
4414     } else {
4415         src_rect.x = 0;
4416         src_rect.y = 0;
4417         src_rect.width = in_width;
4418         src_rect.height = in_height;
4419     }
4420
4421     if (pipeline_param->output_region) {
4422         dst_rect.x = pipeline_param->output_region->x;
4423         dst_rect.y = pipeline_param->output_region->y;
4424         dst_rect.width = pipeline_param->output_region->width;
4425         dst_rect.height = pipeline_param->output_region->height;
4426     } else {
4427         dst_rect.x = 0;
4428         dst_rect.y = 0;
4429         dst_rect.width = in_width;
4430         dst_rect.height = in_height;
4431     }
4432
4433     obj_surface = SURFACE(proc_state->current_render_target);
4434     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4435     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4436     
4437     for (i = 0; i < pipeline_param->num_filters; i++) {
4438         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4439         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4440         VAProcFilterType filter_type = filter_param->type;
4441         VASurfaceID out_surface_id = VA_INVALID_ID;
4442         int kernel_index = procfilter_to_pp_flag[filter_type];
4443
4444         if (kernel_index != PP_NULL &&
4445             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4446             status = i965_CreateSurfaces(ctx,
4447                                          in_width,
4448                                          in_height,
4449                                          VA_RT_FORMAT_YUV420,
4450                                          1,
4451                                          &out_surface_id);
4452             assert(status == VA_STATUS_SUCCESS);
4453             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4454             obj_surface = SURFACE(out_surface_id);
4455             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4456             dst_surface.id = out_surface_id;
4457             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4458             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4459                                                    &src_surface,
4460                                                    &src_rect,
4461                                                    &dst_surface,
4462                                                    &src_rect,
4463                                                    kernel_index,
4464                                                    filter_param);
4465
4466             if (status == VA_STATUS_SUCCESS) {
4467                 src_surface.id = dst_surface.id;
4468                 src_surface.type = dst_surface.type;
4469                 src_surface.flags = dst_surface.flags;
4470             }
4471         }
4472     }
4473
4474     dst_surface.id = proc_state->current_render_target;
4475     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4476
4477     if (src_rect.width == dst_rect.width &&
4478         src_rect.height == dst_rect.height) {
4479         i965_post_processing_internal(ctx, &proc_context->pp_context,
4480                                       &src_surface,
4481                                       &src_rect,
4482                                       &dst_surface,
4483                                       &dst_rect,
4484                                       PP_NV12_LOAD_SAVE_N12,
4485                                       NULL);
4486     } else {
4487
4488         i965_post_processing_internal(ctx, &proc_context->pp_context,
4489                                       &src_surface,
4490                                       &src_rect,
4491                                       &dst_surface,
4492                                       &dst_rect,
4493                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4494                                       PP_NV12_AVS : PP_NV12_SCALING,
4495                                       NULL);
4496     }
4497
4498     if (num_tmp_surfaces)
4499         i965_DestroySurfaces(ctx,
4500                              tmp_surfaces,
4501                              num_tmp_surfaces);
4502
4503     intel_batchbuffer_flush(hw_context->batch);
4504 }
4505
4506 static void
4507 i965_proc_context_destroy(void *hw_context)
4508 {
4509     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4510
4511     i965_post_processing_context_finalize(&proc_context->pp_context);
4512     intel_batchbuffer_free(proc_context->base.batch);
4513     free(proc_context);
4514 }
4515
4516 struct hw_context *
4517 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4518 {
4519     struct intel_driver_data *intel = intel_driver_data(ctx);
4520     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4521
4522     proc_context->base.destroy = i965_proc_context_destroy;
4523     proc_context->base.run = i965_proc_picture;
4524     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4525     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4526
4527     return (struct hw_context *)proc_context;
4528 }