ba4fdc314e7bbc14712fe095c7b140305847d296
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 extern VAStatus
44 vpp_surface_convert(VADriverContextP ctx,
45                     struct object_surface *src_obj_surf,
46                     struct object_surface *dst_obj_surf);
47
48 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
49
50 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
51                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
52
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 #define GPU_ASM_BLOCK_WIDTH         16
57 #define GPU_ASM_BLOCK_HEIGHT        8
58 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
59
60 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
61
62 extern VAStatus
63 i965_CreateSurfaces(VADriverContextP ctx,
64                     int width,
65                     int height,
66                     int format,
67                     int num_surfaces,
68                     VASurfaceID *surfaces);
69
70 static const uint32_t pp_null_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
72 };
73
74 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_scaling_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_avs_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_dndi_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
100 };
101
102 static const uint32_t pp_nv12_dn_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
104 };
105
106 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
108 };
109
110 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
112 };
113
114 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
120 };
121
122 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
123 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
124 };
125
126 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
127 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
128 };
129
130 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
131 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
132 };
133
134 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                    const struct i965_surface *src_surface,
136                                    const VARectangle *src_rect,
137                                    struct i965_surface *dst_surface,
138                                    const VARectangle *dst_rect,
139                                    void *filter_param);
140 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                             const struct i965_surface *src_surface,
142                                             const VARectangle *src_rect,
143                                             struct i965_surface *dst_surface,
144                                             const VARectangle *dst_rect,
145                                             void *filter_param);
146 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                            const struct i965_surface *src_surface,
148                                            const VARectangle *src_rect,
149                                            struct i965_surface *dst_surface,
150                                            const VARectangle *dst_rect,
151                                            void *filter_param);
152 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                              const struct i965_surface *src_surface,
154                                              const VARectangle *src_rect,
155                                              struct i965_surface *dst_surface,
156                                              const VARectangle *dst_rect,
157                                              void *filter_param);
158 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                                 const struct i965_surface *src_surface,
160                                                 const VARectangle *src_rect,
161                                                 struct i965_surface *dst_surface,
162                                                 const VARectangle *dst_rect,
163                                                 void *filter_param);
164 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
165                                         const struct i965_surface *src_surface,
166                                         const VARectangle *src_rect,
167                                         struct i965_surface *dst_surface,
168                                         const VARectangle *dst_rect,
169                                         void *filter_param);
170 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
171                                       const struct i965_surface *src_surface,
172                                       const VARectangle *src_rect,
173                                       struct i965_surface *dst_surface,
174                                       const VARectangle *dst_rect,
175                                       void *filter_param);
176
177 static struct pp_module pp_modules_gen5[] = {
178     {
179         {
180             "NULL module (for testing)",
181             PP_NULL,
182             pp_null_gen5,
183             sizeof(pp_null_gen5),
184             NULL,
185         },
186
187         pp_null_initialize,
188     },
189
190     {
191         {
192             "NV12_NV12",
193             PP_NV12_LOAD_SAVE_N12,
194             pp_nv12_load_save_nv12_gen5,
195             sizeof(pp_nv12_load_save_nv12_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "NV12_PL3",
205             PP_NV12_LOAD_SAVE_PL3,
206             pp_nv12_load_save_pl3_gen5,
207             sizeof(pp_nv12_load_save_pl3_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_NV12",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_nv12_gen5,
219             sizeof(pp_pl3_load_save_nv12_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize,
224     },
225
226     {
227         {
228             "PL3_PL3",
229             PP_PL3_LOAD_SAVE_PL3,
230             pp_pl3_load_save_pl3_gen5,
231             sizeof(pp_pl3_load_save_pl3_gen5),
232             NULL,
233         },
234
235         pp_plx_load_save_plx_initialize
236     },
237
238     {
239         {
240             "NV12 Scaling module",
241             PP_NV12_SCALING,
242             pp_nv12_scaling_gen5,
243             sizeof(pp_nv12_scaling_gen5),
244             NULL,
245         },
246
247         pp_nv12_scaling_initialize,
248     },
249
250     {
251         {
252             "NV12 AVS module",
253             PP_NV12_AVS,
254             pp_nv12_avs_gen5,
255             sizeof(pp_nv12_avs_gen5),
256             NULL,
257         },
258
259         pp_nv12_avs_initialize_nlas,
260     },
261
262     {
263         {
264             "NV12 DNDI module",
265             PP_NV12_DNDI,
266             pp_nv12_dndi_gen5,
267             sizeof(pp_nv12_dndi_gen5),
268             NULL,
269         },
270
271         pp_nv12_dndi_initialize,
272     },
273
274     {
275         {
276             "NV12 DN module",
277             PP_NV12_DN,
278             pp_nv12_dn_gen5,
279             sizeof(pp_nv12_dn_gen5),
280             NULL,
281         },
282
283         pp_nv12_dn_initialize,
284     },
285
286     {
287         {
288             "NV12_PA module",
289             PP_NV12_LOAD_SAVE_PA,
290             pp_nv12_load_save_pa_gen5,
291             sizeof(pp_nv12_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PL3_PA module",
301             PP_PL3_LOAD_SAVE_PA,
302             pp_pl3_load_save_pa_gen5,
303             sizeof(pp_pl3_load_save_pa_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_NV12 module",
313             PP_PA_LOAD_SAVE_NV12,
314             pp_pa_load_save_nv12_gen5,
315             sizeof(pp_pa_load_save_nv12_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "PA_PL3 module",
325             PP_PA_LOAD_SAVE_PL3,
326             pp_pa_load_save_pl3_gen5,
327             sizeof(pp_pa_load_save_pl3_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333
334     {
335         {
336             "PA_PA module",
337             PP_PA_LOAD_SAVE_PA,
338             pp_pa_load_save_pa_gen5,
339             sizeof(pp_pa_load_save_pa_gen5),
340             NULL,
341         },
342
343         pp_plx_load_save_plx_initialize,
344     },
345
346     {
347         {
348             "RGBX_NV12 module",
349             PP_RGBX_LOAD_SAVE_NV12,
350             pp_rgbx_load_save_nv12_gen5,
351             sizeof(pp_rgbx_load_save_nv12_gen5),
352             NULL,
353         },
354     
355         pp_plx_load_save_plx_initialize,
356     },
357             
358     {
359         {
360             "NV12_RGBX module",
361             PP_NV12_LOAD_SAVE_RGBX,
362             pp_nv12_load_save_rgbx_gen5,
363             sizeof(pp_nv12_load_save_rgbx_gen5),
364             NULL,
365         },
366     
367         pp_plx_load_save_plx_initialize,
368     },
369 };
370
371 static const uint32_t pp_null_gen6[][4] = {
372 #include "shaders/post_processing/gen5_6/null.g6b"
373 };
374
375 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
376 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
377 };
378
379 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
380 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
381 };
382
383 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
384 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
385 };
386
387 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
388 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
389 };
390
391 static const uint32_t pp_nv12_scaling_gen6[][4] = {
392 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
393 };
394
395 static const uint32_t pp_nv12_avs_gen6[][4] = {
396 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
397 };
398
399 static const uint32_t pp_nv12_dndi_gen6[][4] = {
400 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
401 };
402
403 static const uint32_t pp_nv12_dn_gen6[][4] = {
404 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
405 };
406
407 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
408 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
409 };
410
411 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
412 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
413 };
414
415 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
416 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
417 };
418
419 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
420 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
421 };
422
423 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
424 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
425 };
426
427 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
428 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
429 };
430
431 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
432 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
433 };
434
435 static struct pp_module pp_modules_gen6[] = {
436     {
437         {
438             "NULL module (for testing)",
439             PP_NULL,
440             pp_null_gen6,
441             sizeof(pp_null_gen6),
442             NULL,
443         },
444
445         pp_null_initialize,
446     },
447
448     {
449         {
450             "NV12_NV12",
451             PP_NV12_LOAD_SAVE_N12,
452             pp_nv12_load_save_nv12_gen6,
453             sizeof(pp_nv12_load_save_nv12_gen6),
454             NULL,
455         },
456
457         pp_plx_load_save_plx_initialize,
458     },
459
460     {
461         {
462             "NV12_PL3",
463             PP_NV12_LOAD_SAVE_PL3,
464             pp_nv12_load_save_pl3_gen6,
465             sizeof(pp_nv12_load_save_pl3_gen6),
466             NULL,
467         },
468         
469         pp_plx_load_save_plx_initialize,
470     },
471
472     {
473         {
474             "PL3_NV12",
475             PP_PL3_LOAD_SAVE_N12,
476             pp_pl3_load_save_nv12_gen6,
477             sizeof(pp_pl3_load_save_nv12_gen6),
478             NULL,
479         },
480
481         pp_plx_load_save_plx_initialize,
482     },
483
484     {
485         {
486             "PL3_PL3",
487             PP_PL3_LOAD_SAVE_PL3,
488             pp_pl3_load_save_pl3_gen6,
489             sizeof(pp_pl3_load_save_pl3_gen6),
490             NULL,
491         },
492
493         pp_plx_load_save_plx_initialize,
494     },
495
496     {
497         {
498             "NV12 Scaling module",
499             PP_NV12_SCALING,
500             pp_nv12_scaling_gen6,
501             sizeof(pp_nv12_scaling_gen6),
502             NULL,
503         },
504
505         gen6_nv12_scaling_initialize,
506     },
507
508     {
509         {
510             "NV12 AVS module",
511             PP_NV12_AVS,
512             pp_nv12_avs_gen6,
513             sizeof(pp_nv12_avs_gen6),
514             NULL,
515         },
516
517         pp_nv12_avs_initialize_nlas,
518     },
519
520     {
521         {
522             "NV12 DNDI module",
523             PP_NV12_DNDI,
524             pp_nv12_dndi_gen6,
525             sizeof(pp_nv12_dndi_gen6),
526             NULL,
527         },
528
529         pp_nv12_dndi_initialize,
530     },
531
532     {
533         {
534             "NV12 DN module",
535             PP_NV12_DN,
536             pp_nv12_dn_gen6,
537             sizeof(pp_nv12_dn_gen6),
538             NULL,
539         },
540
541         pp_nv12_dn_initialize,
542     },
543     {
544         {
545             "NV12_PA module",
546             PP_NV12_LOAD_SAVE_PA,
547             pp_nv12_load_save_pa_gen6,
548             sizeof(pp_nv12_load_save_pa_gen6),
549             NULL,
550         },
551     
552         pp_plx_load_save_plx_initialize,
553     },
554
555     {
556         {
557             "PL3_PA module",
558             PP_PL3_LOAD_SAVE_PA,
559             pp_pl3_load_save_pa_gen6,
560             sizeof(pp_pl3_load_save_pa_gen6),
561             NULL,
562         },
563     
564         pp_plx_load_save_plx_initialize,
565     },
566
567     {
568         {
569             "PA_NV12 module",
570             PP_PA_LOAD_SAVE_NV12,
571             pp_pa_load_save_nv12_gen6,
572             sizeof(pp_pa_load_save_nv12_gen6),
573             NULL,
574         },
575     
576         pp_plx_load_save_plx_initialize,
577     },
578
579     {
580         {
581             "PA_PL3 module",
582             PP_PA_LOAD_SAVE_PL3,
583             pp_pa_load_save_pl3_gen6,
584             sizeof(pp_pa_load_save_pl3_gen6),
585             NULL,
586         },
587     
588         pp_plx_load_save_plx_initialize,
589     },
590
591     {
592         {
593             "PA_PA module",
594             PP_PA_LOAD_SAVE_PA,
595             pp_pa_load_save_pa_gen6,
596             sizeof(pp_pa_load_save_pa_gen6),
597             NULL,
598         },
599
600         pp_plx_load_save_plx_initialize,
601     },
602
603     {
604         {
605             "RGBX_NV12 module",
606             PP_RGBX_LOAD_SAVE_NV12,
607             pp_rgbx_load_save_nv12_gen6,
608             sizeof(pp_rgbx_load_save_nv12_gen6),
609             NULL,
610         },
611     
612         pp_plx_load_save_plx_initialize,
613     },
614
615     {
616         {
617             "NV12_RGBX module",
618             PP_NV12_LOAD_SAVE_RGBX,
619             pp_nv12_load_save_rgbx_gen6,
620             sizeof(pp_nv12_load_save_rgbx_gen6),
621             NULL,
622         },
623     
624         pp_plx_load_save_plx_initialize,
625     },
626 };
627
628 static const uint32_t pp_null_gen7[][4] = {
629 };
630
631 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
632 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
633 };
634
635 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
636 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
637 };
638
639 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
640 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
641 };
642
643 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
644 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
645 };
646
647 static const uint32_t pp_nv12_scaling_gen7[][4] = {
648 #include "shaders/post_processing/gen7/avs.g7b"
649 };
650
651 static const uint32_t pp_nv12_avs_gen7[][4] = {
652 #include "shaders/post_processing/gen7/avs.g7b"
653 };
654
655 static const uint32_t pp_nv12_dndi_gen7[][4] = {
656 #include "shaders/post_processing/gen7/dndi.g7b"
657 };
658
659 static const uint32_t pp_nv12_dn_gen7[][4] = {
660 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
661 };
662 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
663 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
664 };
665 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
666 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
667 };
668 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
669 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
670 };
671 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
672 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
673 };
674 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
675 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
676 };
677 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
678 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
679 };
680 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
681 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
682 };
683
684 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
685                                            const struct i965_surface *src_surface,
686                                            const VARectangle *src_rect,
687                                            struct i965_surface *dst_surface,
688                                            const VARectangle *dst_rect,
689                                            void *filter_param);
690 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
691                                              const struct i965_surface *src_surface,
692                                              const VARectangle *src_rect,
693                                              struct i965_surface *dst_surface,
694                                              const VARectangle *dst_rect,
695                                              void *filter_param);
696 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
697                                            const struct i965_surface *src_surface,
698                                            const VARectangle *src_rect,
699                                            struct i965_surface *dst_surface,
700                                            const VARectangle *dst_rect,
701                                            void *filter_param);
702
703 static struct pp_module pp_modules_gen7[] = {
704     {
705         {
706             "NULL module (for testing)",
707             PP_NULL,
708             pp_null_gen7,
709             sizeof(pp_null_gen7),
710             NULL,
711         },
712
713         pp_null_initialize,
714     },
715
716     {
717         {
718             "NV12_NV12",
719             PP_NV12_LOAD_SAVE_N12,
720             pp_nv12_load_save_nv12_gen7,
721             sizeof(pp_nv12_load_save_nv12_gen7),
722             NULL,
723         },
724
725         gen7_pp_plx_avs_initialize,
726     },
727
728     {
729         {
730             "NV12_PL3",
731             PP_NV12_LOAD_SAVE_PL3,
732             pp_nv12_load_save_pl3_gen7,
733             sizeof(pp_nv12_load_save_pl3_gen7),
734             NULL,
735         },
736         
737         gen7_pp_plx_avs_initialize,
738     },
739
740     {
741         {
742             "PL3_NV12",
743             PP_PL3_LOAD_SAVE_N12,
744             pp_pl3_load_save_nv12_gen7,
745             sizeof(pp_pl3_load_save_nv12_gen7),
746             NULL,
747         },
748
749         gen7_pp_plx_avs_initialize,
750     },
751
752     {
753         {
754             "PL3_PL3",
755             PP_PL3_LOAD_SAVE_PL3,
756             pp_pl3_load_save_pl3_gen7,
757             sizeof(pp_pl3_load_save_pl3_gen7),
758             NULL,
759         },
760
761         gen7_pp_plx_avs_initialize,
762     },
763
764     {
765         {
766             "NV12 Scaling module",
767             PP_NV12_SCALING,
768             pp_nv12_scaling_gen7,
769             sizeof(pp_nv12_scaling_gen7),
770             NULL,
771         },
772
773         gen7_pp_plx_avs_initialize,
774     },
775
776     {
777         {
778             "NV12 AVS module",
779             PP_NV12_AVS,
780             pp_nv12_avs_gen7,
781             sizeof(pp_nv12_avs_gen7),
782             NULL,
783         },
784
785         gen7_pp_plx_avs_initialize,
786     },
787
788     {
789         {
790             "NV12 DNDI module",
791             PP_NV12_DNDI,
792             pp_nv12_dndi_gen7,
793             sizeof(pp_nv12_dndi_gen7),
794             NULL,
795         },
796
797         gen7_pp_nv12_dndi_initialize,
798     },
799
800     {
801         {
802             "NV12 DN module",
803             PP_NV12_DN,
804             pp_nv12_dn_gen7,
805             sizeof(pp_nv12_dn_gen7),
806             NULL,
807         },
808
809         gen7_pp_nv12_dn_initialize,
810     },
811     {
812         {
813             "NV12_PA module",
814             PP_NV12_LOAD_SAVE_PA,
815             pp_nv12_load_save_pa_gen7,
816             sizeof(pp_nv12_load_save_pa_gen7),
817             NULL,
818         },
819     
820         gen7_pp_plx_avs_initialize,
821     },
822
823     {
824         {
825             "PL3_PA module",
826             PP_PL3_LOAD_SAVE_PA,
827             pp_pl3_load_save_pa_gen7,
828             sizeof(pp_pl3_load_save_pa_gen7),
829             NULL,
830         },
831     
832         gen7_pp_plx_avs_initialize,
833     },
834
835     {
836         {
837             "PA_NV12 module",
838             PP_PA_LOAD_SAVE_NV12,
839             pp_pa_load_save_nv12_gen7,
840             sizeof(pp_pa_load_save_nv12_gen7),
841             NULL,
842         },
843     
844         gen7_pp_plx_avs_initialize,
845     },
846
847     {
848         {
849             "PA_PL3 module",
850             PP_PA_LOAD_SAVE_PL3,
851             pp_pa_load_save_pl3_gen7,
852             sizeof(pp_pa_load_save_pl3_gen7),
853             NULL,
854         },
855     
856         gen7_pp_plx_avs_initialize,
857     },
858
859     {
860         {
861             "PA_PA module",
862             PP_PA_LOAD_SAVE_PA,
863             pp_pa_load_save_pa_gen7,
864             sizeof(pp_pa_load_save_pa_gen7),
865             NULL,
866         },
867
868         gen7_pp_plx_avs_initialize,
869     },
870
871     {
872         {
873             "RGBX_NV12 module",
874             PP_RGBX_LOAD_SAVE_NV12,
875             pp_rgbx_load_save_nv12_gen7,
876             sizeof(pp_rgbx_load_save_nv12_gen7),
877             NULL,
878         },
879     
880         gen7_pp_plx_avs_initialize,
881     },
882
883     {
884         {
885             "NV12_RGBX module",
886             PP_NV12_LOAD_SAVE_RGBX,
887             pp_nv12_load_save_rgbx_gen7,
888             sizeof(pp_nv12_load_save_rgbx_gen7),
889             NULL,
890         },
891     
892         gen7_pp_plx_avs_initialize,
893     },
894             
895 };
896
897 static const uint32_t pp_null_gen75[][4] = {
898 };
899
900 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
902 };
903
904 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
905 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
906 };
907
908 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
909 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
910 };
911
912 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
913 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
914 };
915
916 static const uint32_t pp_nv12_scaling_gen75[][4] = {
917 #include "shaders/post_processing/gen7/avs.g75b"
918 };
919
920 static const uint32_t pp_nv12_avs_gen75[][4] = {
921 #include "shaders/post_processing/gen7/avs.g75b"
922 };
923
924 static const uint32_t pp_nv12_dndi_gen75[][4] = {
925 // #include "shaders/post_processing/gen7/dndi.g75b"
926 };
927
928 static const uint32_t pp_nv12_dn_gen75[][4] = {
929 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
930 };
931 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
932 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
933 };
934 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
935 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
936 };
937 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
938 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
939 };
940 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
941 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
942 };
943 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
944 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
945 };
946 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
947 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
948 };
949 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
950 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
951 };
952
953 static struct pp_module pp_modules_gen75[] = {
954     {
955         {
956             "NULL module (for testing)",
957             PP_NULL,
958             pp_null_gen75,
959             sizeof(pp_null_gen75),
960             NULL,
961         },
962
963         pp_null_initialize,
964     },
965
966     {
967         {
968             "NV12_NV12",
969             PP_NV12_LOAD_SAVE_N12,
970             pp_nv12_load_save_nv12_gen75,
971             sizeof(pp_nv12_load_save_nv12_gen75),
972             NULL,
973         },
974
975         gen7_pp_plx_avs_initialize,
976     },
977
978     {
979         {
980             "NV12_PL3",
981             PP_NV12_LOAD_SAVE_PL3,
982             pp_nv12_load_save_pl3_gen75,
983             sizeof(pp_nv12_load_save_pl3_gen75),
984             NULL,
985         },
986         
987         gen7_pp_plx_avs_initialize,
988     },
989
990     {
991         {
992             "PL3_NV12",
993             PP_PL3_LOAD_SAVE_N12,
994             pp_pl3_load_save_nv12_gen75,
995             sizeof(pp_pl3_load_save_nv12_gen75),
996             NULL,
997         },
998
999         gen7_pp_plx_avs_initialize,
1000     },
1001
1002     {
1003         {
1004             "PL3_PL3",
1005             PP_PL3_LOAD_SAVE_PL3,
1006             pp_pl3_load_save_pl3_gen75,
1007             sizeof(pp_pl3_load_save_pl3_gen75),
1008             NULL,
1009         },
1010
1011         gen7_pp_plx_avs_initialize,
1012     },
1013
1014     {
1015         {
1016             "NV12 Scaling module",
1017             PP_NV12_SCALING,
1018             pp_nv12_scaling_gen75,
1019             sizeof(pp_nv12_scaling_gen75),
1020             NULL,
1021         },
1022
1023         gen7_pp_plx_avs_initialize,
1024     },
1025
1026     {
1027         {
1028             "NV12 AVS module",
1029             PP_NV12_AVS,
1030             pp_nv12_avs_gen75,
1031             sizeof(pp_nv12_avs_gen75),
1032             NULL,
1033         },
1034
1035         gen7_pp_plx_avs_initialize,
1036     },
1037
1038     {
1039         {
1040             "NV12 DNDI module",
1041             PP_NV12_DNDI,
1042             pp_nv12_dndi_gen75,
1043             sizeof(pp_nv12_dndi_gen75),
1044             NULL,
1045         },
1046
1047         gen7_pp_nv12_dn_initialize,
1048     },
1049
1050     {
1051         {
1052             "NV12 DN module",
1053             PP_NV12_DN,
1054             pp_nv12_dn_gen75,
1055             sizeof(pp_nv12_dn_gen75),
1056             NULL,
1057         },
1058
1059         gen7_pp_nv12_dn_initialize,
1060     },
1061
1062     {
1063         {
1064             "NV12_PA module",
1065             PP_NV12_LOAD_SAVE_PA,
1066             pp_nv12_load_save_pa_gen75,
1067             sizeof(pp_nv12_load_save_pa_gen75),
1068             NULL,
1069         },
1070     
1071         gen7_pp_plx_avs_initialize,
1072     },
1073
1074     {
1075         {
1076             "PL3_PA module",
1077             PP_PL3_LOAD_SAVE_PA,
1078             pp_pl3_load_save_pa_gen75,
1079             sizeof(pp_pl3_load_save_pa_gen75),
1080             NULL,
1081         },
1082     
1083         gen7_pp_plx_avs_initialize,
1084     },
1085
1086     {
1087         {
1088             "PA_NV12 module",
1089             PP_PA_LOAD_SAVE_NV12,
1090             pp_pa_load_save_nv12_gen75,
1091             sizeof(pp_pa_load_save_nv12_gen75),
1092             NULL,
1093         },
1094     
1095         gen7_pp_plx_avs_initialize,
1096     },
1097
1098     {
1099         {
1100             "PA_PL3 module",
1101             PP_PA_LOAD_SAVE_PL3,
1102             pp_pa_load_save_pl3_gen75,
1103             sizeof(pp_pa_load_save_pl3_gen75),
1104             NULL,
1105         },
1106     
1107         gen7_pp_plx_avs_initialize,
1108     },
1109
1110     {
1111         {
1112             "PA_PA module",
1113             PP_PA_LOAD_SAVE_PA,
1114             pp_pa_load_save_pa_gen75,
1115             sizeof(pp_pa_load_save_pa_gen75),
1116             NULL,
1117         },
1118
1119         gen7_pp_plx_avs_initialize,
1120     },
1121
1122     {
1123         {
1124             "RGBX_NV12 module",
1125             PP_RGBX_LOAD_SAVE_NV12,
1126             pp_rgbx_load_save_nv12_gen75,
1127             sizeof(pp_rgbx_load_save_nv12_gen75),
1128             NULL,
1129         },
1130     
1131         gen7_pp_plx_avs_initialize,
1132     },
1133
1134     {
1135         {
1136             "NV12_RGBX module",
1137             PP_NV12_LOAD_SAVE_RGBX,
1138             pp_nv12_load_save_rgbx_gen75,
1139             sizeof(pp_nv12_load_save_rgbx_gen75),
1140             NULL,
1141         },
1142     
1143         gen7_pp_plx_avs_initialize,
1144     },
1145             
1146 };
1147
1148 static int
1149 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1150 {
1151     int fourcc;
1152
1153     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1154         struct object_image *obj_image = (struct object_image *)surface->base;
1155         fourcc = obj_image->image.format.fourcc;
1156     } else {
1157         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1158         fourcc = obj_surface->fourcc;
1159     }
1160
1161     return fourcc;
1162 }
1163
1164 static void
1165 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1166 {
1167     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1168         struct object_image *obj_image = (struct object_image *)surface->base;
1169
1170         *width = obj_image->image.width;
1171         *height = obj_image->image.height;
1172     } else {
1173         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1174
1175         *width = obj_surface->orig_width;
1176         *height = obj_surface->orig_height;
1177     }
1178 }
1179
1180 static void
1181 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1182 {
1183     switch (tiling) {
1184     case I915_TILING_NONE:
1185         ss->ss3.tiled_surface = 0;
1186         ss->ss3.tile_walk = 0;
1187         break;
1188     case I915_TILING_X:
1189         ss->ss3.tiled_surface = 1;
1190         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1191         break;
1192     case I915_TILING_Y:
1193         ss->ss3.tiled_surface = 1;
1194         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1195         break;
1196     }
1197 }
1198
1199 static void
1200 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1201 {
1202     switch (tiling) {
1203     case I915_TILING_NONE:
1204         ss->ss2.tiled_surface = 0;
1205         ss->ss2.tile_walk = 0;
1206         break;
1207     case I915_TILING_X:
1208         ss->ss2.tiled_surface = 1;
1209         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1210         break;
1211     case I915_TILING_Y:
1212         ss->ss2.tiled_surface = 1;
1213         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1214         break;
1215     }
1216 }
1217
1218 static void
1219 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1220 {
1221     switch (tiling) {
1222     case I915_TILING_NONE:
1223         ss->ss0.tiled_surface = 0;
1224         ss->ss0.tile_walk = 0;
1225         break;
1226     case I915_TILING_X:
1227         ss->ss0.tiled_surface = 1;
1228         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1229         break;
1230     case I915_TILING_Y:
1231         ss->ss0.tiled_surface = 1;
1232         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1233         break;
1234     }
1235 }
1236
1237 static void
1238 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1239 {
1240     switch (tiling) {
1241     case I915_TILING_NONE:
1242         ss->ss2.tiled_surface = 0;
1243         ss->ss2.tile_walk = 0;
1244         break;
1245     case I915_TILING_X:
1246         ss->ss2.tiled_surface = 1;
1247         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1248         break;
1249     case I915_TILING_Y:
1250         ss->ss2.tiled_surface = 1;
1251         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1252         break;
1253     }
1254 }
1255
1256 static void
1257 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1258 {
1259     struct i965_interface_descriptor *desc;
1260     dri_bo *bo;
1261     int pp_index = pp_context->current_pp;
1262
1263     bo = pp_context->idrt.bo;
1264     dri_bo_map(bo, 1);
1265     assert(bo->virtual);
1266     desc = bo->virtual;
1267     memset(desc, 0, sizeof(*desc));
1268     desc->desc0.grf_reg_blocks = 10;
1269     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1270     desc->desc1.const_urb_entry_read_offset = 0;
1271     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1272     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1273     desc->desc2.sampler_count = 0;
1274     desc->desc3.binding_table_entry_count = 0;
1275     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1276
1277     dri_bo_emit_reloc(bo,
1278                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1279                       desc->desc0.grf_reg_blocks,
1280                       offsetof(struct i965_interface_descriptor, desc0),
1281                       pp_context->pp_modules[pp_index].kernel.bo);
1282
1283     dri_bo_emit_reloc(bo,
1284                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1285                       desc->desc2.sampler_count << 2,
1286                       offsetof(struct i965_interface_descriptor, desc2),
1287                       pp_context->sampler_state_table.bo);
1288
1289     dri_bo_unmap(bo);
1290     pp_context->idrt.num_interface_descriptors++;
1291 }
1292
1293 static void
1294 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1295 {
1296     struct i965_vfe_state *vfe_state;
1297     dri_bo *bo;
1298
1299     bo = pp_context->vfe_state.bo;
1300     dri_bo_map(bo, 1);
1301     assert(bo->virtual);
1302     vfe_state = bo->virtual;
1303     memset(vfe_state, 0, sizeof(*vfe_state));
1304     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1305     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1306     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1307     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1308     vfe_state->vfe1.children_present = 0;
1309     vfe_state->vfe2.interface_descriptor_base = 
1310         pp_context->idrt.bo->offset >> 4; /* reloc */
1311     dri_bo_emit_reloc(bo,
1312                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1313                       0,
1314                       offsetof(struct i965_vfe_state, vfe2),
1315                       pp_context->idrt.bo);
1316     dri_bo_unmap(bo);
1317 }
1318
1319 static void
1320 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1321 {
1322     unsigned char *constant_buffer;
1323     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1324
1325     assert(sizeof(*pp_static_parameter) == 128);
1326     dri_bo_map(pp_context->curbe.bo, 1);
1327     assert(pp_context->curbe.bo->virtual);
1328     constant_buffer = pp_context->curbe.bo->virtual;
1329     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1330     dri_bo_unmap(pp_context->curbe.bo);
1331 }
1332
1333 static void
1334 ironlake_pp_states_setup(VADriverContextP ctx,
1335                          struct i965_post_processing_context *pp_context)
1336 {
1337     ironlake_pp_interface_descriptor_table(pp_context);
1338     ironlake_pp_vfe_state(pp_context);
1339     ironlake_pp_upload_constants(pp_context);
1340 }
1341
1342 static void
1343 ironlake_pp_pipeline_select(VADriverContextP ctx,
1344                             struct i965_post_processing_context *pp_context)
1345 {
1346     struct intel_batchbuffer *batch = pp_context->batch;
1347
1348     BEGIN_BATCH(batch, 1);
1349     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1350     ADVANCE_BATCH(batch);
1351 }
1352
1353 static void
1354 ironlake_pp_urb_layout(VADriverContextP ctx,
1355                        struct i965_post_processing_context *pp_context)
1356 {
1357     struct intel_batchbuffer *batch = pp_context->batch;
1358     unsigned int vfe_fence, cs_fence;
1359
1360     vfe_fence = pp_context->urb.cs_start;
1361     cs_fence = pp_context->urb.size;
1362
1363     BEGIN_BATCH(batch, 3);
1364     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1365     OUT_BATCH(batch, 0);
1366     OUT_BATCH(batch, 
1367               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1368               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1369     ADVANCE_BATCH(batch);
1370 }
1371
1372 static void
1373 ironlake_pp_state_base_address(VADriverContextP ctx,
1374                                struct i965_post_processing_context *pp_context)
1375 {
1376     struct intel_batchbuffer *batch = pp_context->batch;
1377
1378     BEGIN_BATCH(batch, 8);
1379     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1380     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1381     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1382     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1387     ADVANCE_BATCH(batch);
1388 }
1389
1390 static void
1391 ironlake_pp_state_pointers(VADriverContextP ctx,
1392                            struct i965_post_processing_context *pp_context)
1393 {
1394     struct intel_batchbuffer *batch = pp_context->batch;
1395
1396     BEGIN_BATCH(batch, 3);
1397     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1398     OUT_BATCH(batch, 0);
1399     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1400     ADVANCE_BATCH(batch);
1401 }
1402
1403 static void 
1404 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1405                           struct i965_post_processing_context *pp_context)
1406 {
1407     struct intel_batchbuffer *batch = pp_context->batch;
1408
1409     BEGIN_BATCH(batch, 2);
1410     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1411     OUT_BATCH(batch,
1412               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1413               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1414     ADVANCE_BATCH(batch);
1415 }
1416
1417 static void
1418 ironlake_pp_constant_buffer(VADriverContextP ctx,
1419                             struct i965_post_processing_context *pp_context)
1420 {
1421     struct intel_batchbuffer *batch = pp_context->batch;
1422
1423     BEGIN_BATCH(batch, 2);
1424     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1425     OUT_RELOC(batch, pp_context->curbe.bo,
1426               I915_GEM_DOMAIN_INSTRUCTION, 0,
1427               pp_context->urb.size_cs_entry - 1);
1428     ADVANCE_BATCH(batch);    
1429 }
1430
1431 static void
1432 ironlake_pp_object_walker(VADriverContextP ctx,
1433                           struct i965_post_processing_context *pp_context)
1434 {
1435     struct intel_batchbuffer *batch = pp_context->batch;
1436     int x, x_steps, y, y_steps;
1437     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1438
1439     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1440     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1441
1442     for (y = 0; y < y_steps; y++) {
1443         for (x = 0; x < x_steps; x++) {
1444             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1445                 BEGIN_BATCH(batch, 20);
1446                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1447                 OUT_BATCH(batch, 0);
1448                 OUT_BATCH(batch, 0); /* no indirect data */
1449                 OUT_BATCH(batch, 0);
1450
1451                 /* inline data grf 5-6 */
1452                 assert(sizeof(*pp_inline_parameter) == 64);
1453                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1454
1455                 ADVANCE_BATCH(batch);
1456             }
1457         }
1458     }
1459 }
1460
1461 static void
1462 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1463                            struct i965_post_processing_context *pp_context)
1464 {
1465     struct intel_batchbuffer *batch = pp_context->batch;
1466
1467     intel_batchbuffer_start_atomic(batch, 0x1000);
1468     intel_batchbuffer_emit_mi_flush(batch);
1469     ironlake_pp_pipeline_select(ctx, pp_context);
1470     ironlake_pp_state_base_address(ctx, pp_context);
1471     ironlake_pp_state_pointers(ctx, pp_context);
1472     ironlake_pp_urb_layout(ctx, pp_context);
1473     ironlake_pp_cs_urb_layout(ctx, pp_context);
1474     ironlake_pp_constant_buffer(ctx, pp_context);
1475     ironlake_pp_object_walker(ctx, pp_context);
1476     intel_batchbuffer_end_atomic(batch);
1477 }
1478
1479 // update u/v offset when the surface format are packed yuv
1480 static void i965_update_src_surface_static_parameter(
1481     VADriverContextP    ctx, 
1482     struct i965_post_processing_context *pp_context,
1483     const struct i965_surface *surface)
1484 {
1485     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1486     int fourcc = pp_get_surface_fourcc(ctx, surface);
1487
1488     switch (fourcc) {
1489     case VA_FOURCC_YUY2:
1490         pp_static_parameter->grf1.source_packed_u_offset = 1;
1491         pp_static_parameter->grf1.source_packed_v_offset = 3;
1492         break;
1493     case VA_FOURCC_UYVY:
1494         pp_static_parameter->grf1.source_packed_y_offset = 1;
1495         pp_static_parameter->grf1.source_packed_v_offset = 2;
1496         break;
1497     case VA_FOURCC_BGRX:
1498     case VA_FOURCC_BGRA:
1499         pp_static_parameter->grf1.source_rgb_layout = 0;
1500         break;
1501     case VA_FOURCC_RGBX:
1502     case VA_FOURCC_RGBA:
1503         pp_static_parameter->grf1.source_rgb_layout = 1;
1504         break;
1505     default:
1506         break;
1507     }
1508     
1509 }
1510
1511 static void i965_update_dst_surface_static_parameter(
1512     VADriverContextP    ctx, 
1513     struct i965_post_processing_context *pp_context,
1514     const struct i965_surface *surface)
1515 {
1516     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1517     int fourcc = pp_get_surface_fourcc(ctx, surface);
1518
1519     switch (fourcc) {
1520     case VA_FOURCC_YUY2:
1521         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1522         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1523         break;
1524     case VA_FOURCC_UYVY:
1525         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1526         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1527         break;
1528     case VA_FOURCC_BGRX:
1529     case VA_FOURCC_BGRA:
1530         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1531         break;
1532     case VA_FOURCC_RGBX:
1533     case VA_FOURCC_RGBA:
1534         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1535         break;
1536     default:
1537         break;
1538     }
1539     
1540 }
1541
1542 static void
1543 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1544                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1545                           int width, int height, int pitch, int format, 
1546                           int index, int is_target)
1547 {
1548     struct i965_surface_state *ss;
1549     dri_bo *ss_bo;
1550     unsigned int tiling;
1551     unsigned int swizzle;
1552
1553     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1554     ss_bo = pp_context->surface_state_binding_table.bo;
1555     assert(ss_bo);
1556
1557     dri_bo_map(ss_bo, True);
1558     assert(ss_bo->virtual);
1559     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1560     memset(ss, 0, sizeof(*ss));
1561     ss->ss0.surface_type = I965_SURFACE_2D;
1562     ss->ss0.surface_format = format;
1563     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1564     ss->ss2.width = width - 1;
1565     ss->ss2.height = height - 1;
1566     ss->ss3.pitch = pitch - 1;
1567     pp_set_surface_tiling(ss, tiling);
1568     dri_bo_emit_reloc(ss_bo,
1569                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1570                       surf_bo_offset,
1571                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1572                       surf_bo);
1573     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1574     dri_bo_unmap(ss_bo);
1575 }
1576
1577 static void
1578 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1579                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1580                            int width, int height, int wpitch,
1581                            int xoffset, int yoffset,
1582                            int format, int interleave_chroma,
1583                            int index)
1584 {
1585     struct i965_surface_state2 *ss2;
1586     dri_bo *ss2_bo;
1587     unsigned int tiling;
1588     unsigned int swizzle;
1589
1590     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1591     ss2_bo = pp_context->surface_state_binding_table.bo;
1592     assert(ss2_bo);
1593
1594     dri_bo_map(ss2_bo, True);
1595     assert(ss2_bo->virtual);
1596     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1597     memset(ss2, 0, sizeof(*ss2));
1598     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1599     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1600     ss2->ss1.width = width - 1;
1601     ss2->ss1.height = height - 1;
1602     ss2->ss2.pitch = wpitch - 1;
1603     ss2->ss2.interleave_chroma = interleave_chroma;
1604     ss2->ss2.surface_format = format;
1605     ss2->ss3.x_offset_for_cb = xoffset;
1606     ss2->ss3.y_offset_for_cb = yoffset;
1607     pp_set_surface2_tiling(ss2, tiling);
1608     dri_bo_emit_reloc(ss2_bo,
1609                       I915_GEM_DOMAIN_RENDER, 0,
1610                       surf_bo_offset,
1611                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1612                       surf_bo);
1613     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1614     dri_bo_unmap(ss2_bo);
1615 }
1616
1617 static void
1618 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1619                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1620                           int width, int height, int pitch, int format, 
1621                           int index, int is_target)
1622 {
1623     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1624     struct gen7_surface_state *ss;
1625     dri_bo *ss_bo;
1626     unsigned int tiling;
1627     unsigned int swizzle;
1628
1629     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1630     ss_bo = pp_context->surface_state_binding_table.bo;
1631     assert(ss_bo);
1632
1633     dri_bo_map(ss_bo, True);
1634     assert(ss_bo->virtual);
1635     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1636     memset(ss, 0, sizeof(*ss));
1637     ss->ss0.surface_type = I965_SURFACE_2D;
1638     ss->ss0.surface_format = format;
1639     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1640     ss->ss2.width = width - 1;
1641     ss->ss2.height = height - 1;
1642     ss->ss3.pitch = pitch - 1;
1643     gen7_pp_set_surface_tiling(ss, tiling);
1644     if (IS_HASWELL(i965->intel.device_info))
1645         gen7_render_set_surface_scs(ss);
1646     dri_bo_emit_reloc(ss_bo,
1647                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1648                       surf_bo_offset,
1649                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1650                       surf_bo);
1651     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1652     dri_bo_unmap(ss_bo);
1653 }
1654
1655 static void
1656 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1657                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1658                            int width, int height, int wpitch,
1659                            int xoffset, int yoffset,
1660                            int format, int interleave_chroma,
1661                            int index)
1662 {
1663     struct gen7_surface_state2 *ss2;
1664     dri_bo *ss2_bo;
1665     unsigned int tiling;
1666     unsigned int swizzle;
1667
1668     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1669     ss2_bo = pp_context->surface_state_binding_table.bo;
1670     assert(ss2_bo);
1671
1672     dri_bo_map(ss2_bo, True);
1673     assert(ss2_bo->virtual);
1674     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1675     memset(ss2, 0, sizeof(*ss2));
1676     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1677     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1678     ss2->ss1.width = width - 1;
1679     ss2->ss1.height = height - 1;
1680     ss2->ss2.pitch = wpitch - 1;
1681     ss2->ss2.interleave_chroma = interleave_chroma;
1682     ss2->ss2.surface_format = format;
1683     ss2->ss3.x_offset_for_cb = xoffset;
1684     ss2->ss3.y_offset_for_cb = yoffset;
1685     gen7_pp_set_surface2_tiling(ss2, tiling);
1686     dri_bo_emit_reloc(ss2_bo,
1687                       I915_GEM_DOMAIN_RENDER, 0,
1688                       surf_bo_offset,
1689                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1690                       surf_bo);
1691     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1692     dri_bo_unmap(ss2_bo);
1693 }
1694
1695 static void 
1696 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1697                                 const struct i965_surface *surface, 
1698                                 int base_index, int is_target,
1699                                 int *width, int *height, int *pitch, int *offset)
1700 {
1701     struct object_surface *obj_surface;
1702     struct object_image *obj_image;
1703     dri_bo *bo;
1704     int fourcc = pp_get_surface_fourcc(ctx, surface);
1705     const int Y = 0;
1706     const int U = ((fourcc == VA_FOURCC_YV12) ||
1707                    (fourcc == VA_FOURCC_YV16))
1708                    ? 2 : 1;
1709     const int V = ((fourcc == VA_FOURCC_YV12) ||
1710                    (fourcc == VA_FOURCC_YV16))
1711                    ? 1 : 2;
1712     const int UV = 1;
1713     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1714     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1715     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1716                               fourcc == VA_FOURCC_RGBX ||
1717                               fourcc == VA_FOURCC_BGRA ||
1718                               fourcc == VA_FOURCC_BGRX);
1719     int scale_factor_of_1st_plane_width_in_byte = 1;
1720                               
1721     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1722         obj_surface = (struct object_surface *)surface->base;
1723         bo = obj_surface->bo;
1724         width[0] = obj_surface->orig_width;
1725         height[0] = obj_surface->orig_height;
1726         pitch[0] = obj_surface->width;
1727         offset[0] = 0;
1728
1729         if (full_packed_format) {
1730             scale_factor_of_1st_plane_width_in_byte = 4; 
1731         }
1732         else if (packed_yuv ) {
1733             scale_factor_of_1st_plane_width_in_byte =  2; 
1734         }
1735         else if (interleaved_uv) {
1736             width[1] = obj_surface->orig_width;
1737             height[1] = obj_surface->orig_height / 2;
1738             pitch[1] = obj_surface->width;
1739             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1740         } else {
1741             width[1] = obj_surface->orig_width / 2;
1742             height[1] = obj_surface->orig_height / 2;
1743             pitch[1] = obj_surface->width / 2;
1744             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1745             width[2] = obj_surface->orig_width / 2;
1746             height[2] = obj_surface->orig_height / 2;
1747             pitch[2] = obj_surface->width / 2;
1748             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1749         }
1750     } else {
1751         obj_image = (struct object_image *)surface->base;
1752         bo = obj_image->bo;
1753         width[0] = obj_image->image.width;
1754         height[0] = obj_image->image.height;
1755         pitch[0] = obj_image->image.pitches[0];
1756         offset[0] = obj_image->image.offsets[0];
1757
1758         if (full_packed_format) {
1759             scale_factor_of_1st_plane_width_in_byte = 4;
1760         }
1761         else if (packed_yuv ) {
1762             scale_factor_of_1st_plane_width_in_byte = 2;
1763         }
1764         else if (interleaved_uv) {
1765             width[1] = obj_image->image.width;
1766             height[1] = obj_image->image.height / 2;
1767             pitch[1] = obj_image->image.pitches[1];
1768             offset[1] = obj_image->image.offsets[1];
1769         } else {
1770             width[1] = obj_image->image.width / 2;
1771             height[1] = obj_image->image.height / 2;
1772             pitch[1] = obj_image->image.pitches[1];
1773             offset[1] = obj_image->image.offsets[1];
1774             width[2] = obj_image->image.width / 2;
1775             height[2] = obj_image->image.height / 2;
1776             pitch[2] = obj_image->image.pitches[2];
1777             offset[2] = obj_image->image.offsets[2];
1778             if (fourcc == VA_FOURCC_YV16) {
1779                 width[1] = obj_image->image.width / 2;
1780                 height[1] = obj_image->image.height;
1781                 width[2] = obj_image->image.width / 2;
1782                 height[2] = obj_image->image.height;
1783             }
1784         }
1785     }
1786
1787     /* Y surface */
1788     i965_pp_set_surface_state(ctx, pp_context,
1789                               bo, offset[Y],
1790                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1791                               base_index, is_target);
1792
1793     if (!packed_yuv && !full_packed_format) {
1794         if (interleaved_uv) {
1795             i965_pp_set_surface_state(ctx, pp_context,
1796                                       bo, offset[UV],
1797                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1798                                       base_index + 1, is_target);
1799         } else {
1800             /* U surface */
1801             i965_pp_set_surface_state(ctx, pp_context,
1802                                       bo, offset[U],
1803                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1804                                       base_index + 1, is_target);
1805
1806             /* V surface */
1807             i965_pp_set_surface_state(ctx, pp_context,
1808                                       bo, offset[V],
1809                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1810                                       base_index + 2, is_target);
1811         }
1812     }
1813
1814 }
1815
1816 static void 
1817 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1818                                      const struct i965_surface *surface, 
1819                                      int base_index, int is_target,
1820                                      int *width, int *height, int *pitch, int *offset)
1821 {
1822     struct object_surface *obj_surface;
1823     struct object_image *obj_image;
1824     dri_bo *bo;
1825     int fourcc = pp_get_surface_fourcc(ctx, surface);
1826     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
1827
1828     if (fourcc_info == NULL)
1829         return;
1830
1831     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1832         obj_surface = (struct object_surface *)surface->base;
1833         bo = obj_surface->bo;
1834         width[0] = obj_surface->orig_width;
1835         height[0] = obj_surface->orig_height;
1836         pitch[0] = obj_surface->width;
1837         offset[0] = 0;
1838
1839         if (fourcc_info->num_planes == 1 && is_target)
1840             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
1841
1842         width[1] = obj_surface->cb_cr_width;
1843         height[1] = obj_surface->cb_cr_height;
1844         pitch[1] = obj_surface->cb_cr_pitch;
1845         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1846
1847         width[2] = obj_surface->cb_cr_width;
1848         height[2] = obj_surface->cb_cr_height;
1849         pitch[2] = obj_surface->cb_cr_pitch;
1850         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1851     } else {
1852         int U = 0, V = 0;
1853
1854         /* FIXME: add support for ARGB/ABGR image */
1855         obj_image = (struct object_image *)surface->base;
1856         bo = obj_image->bo;
1857         width[0] = obj_image->image.width;
1858         height[0] = obj_image->image.height;
1859         pitch[0] = obj_image->image.pitches[0];
1860         offset[0] = obj_image->image.offsets[0];
1861
1862         if (fourcc_info->num_planes == 1) {
1863             if (is_target)
1864                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
1865         } else if (fourcc_info->num_planes == 2) {
1866             U = 1, V = 1;
1867         } else {
1868             assert(fourcc_info->num_components == 3);
1869
1870             U = fourcc_info->components[1].plane;
1871             V = fourcc_info->components[2].plane;
1872             assert((U == 1 && V == 2) ||
1873                    (U == 2 && V == 1));
1874         }
1875
1876         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
1877         width[1] = obj_image->image.width / fourcc_info->hfactor;
1878         height[1] = obj_image->image.height / fourcc_info->vfactor;
1879         pitch[1] = obj_image->image.pitches[U];
1880         offset[1] = obj_image->image.offsets[U];
1881
1882         width[2] = obj_image->image.width / fourcc_info->hfactor;
1883         height[2] = obj_image->image.height / fourcc_info->vfactor;
1884         pitch[2] = obj_image->image.pitches[V];
1885         offset[2] = obj_image->image.offsets[V];
1886     }
1887
1888     if (is_target) {
1889         gen7_pp_set_surface_state(ctx, pp_context,
1890                                   bo, 0,
1891                                   width[0] / 4, height[0], pitch[0],
1892                                   I965_SURFACEFORMAT_R8_UINT,
1893                                   base_index, 1);
1894
1895         if (fourcc_info->num_planes == 2) {
1896             gen7_pp_set_surface_state(ctx, pp_context,
1897                                       bo, offset[1],
1898                                       width[1] / 2, height[1], pitch[1],
1899                                       I965_SURFACEFORMAT_R8G8_SINT,
1900                                       base_index + 1, 1);
1901         } else if (fourcc_info->num_planes == 3) {
1902             gen7_pp_set_surface_state(ctx, pp_context,
1903                                       bo, offset[1],
1904                                       width[1] / 4, height[1], pitch[1],
1905                                       I965_SURFACEFORMAT_R8_SINT,
1906                                       base_index + 1, 1);
1907             gen7_pp_set_surface_state(ctx, pp_context,
1908                                       bo, offset[2],
1909                                       width[2] / 4, height[2], pitch[2],
1910                                       I965_SURFACEFORMAT_R8_SINT,
1911                                       base_index + 2, 1);
1912         }
1913
1914         if (fourcc_info->format == I965_COLOR_RGB) {
1915             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1916             /* the format is MSB: X-B-G-R */
1917             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1918             if ((fourcc == VA_FOURCC_BGRA) ||
1919                 (fourcc == VA_FOURCC_BGRX)) {
1920                 /* It is stored as MSB: X-R-G-B */
1921                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1922             }
1923         }
1924     } else {
1925         int format0 = SURFACE_FORMAT_Y8_UNORM;
1926
1927         switch (fourcc) {
1928         case VA_FOURCC_YUY2:
1929             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1930             break;
1931
1932         case VA_FOURCC_UYVY:
1933             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1934             break;
1935
1936         default:
1937             break;
1938         }
1939
1940         if (fourcc_info->format == I965_COLOR_RGB) {
1941             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1942             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
1943             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
1944             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
1945             if ((fourcc == VA_FOURCC_BGRA) ||
1946                 (fourcc == VA_FOURCC_BGRX)) {
1947                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
1948             }
1949         }
1950
1951         gen7_pp_set_surface2_state(ctx, pp_context,
1952                                    bo, offset[0],
1953                                    width[0], height[0], pitch[0],
1954                                    0, 0,
1955                                    format0, 0,
1956                                    base_index);
1957
1958         if (fourcc_info->num_planes == 2) {
1959             gen7_pp_set_surface2_state(ctx, pp_context,
1960                                        bo, offset[1],
1961                                        width[1], height[1], pitch[1],
1962                                        0, 0,
1963                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1964                                        base_index + 1);
1965         } else if (fourcc_info->num_planes == 3) {
1966             gen7_pp_set_surface2_state(ctx, pp_context,
1967                                        bo, offset[1],
1968                                        width[1], height[1], pitch[1],
1969                                        0, 0,
1970                                        SURFACE_FORMAT_R8_UNORM, 0,
1971                                        base_index + 1);
1972             gen7_pp_set_surface2_state(ctx, pp_context,
1973                                        bo, offset[2],
1974                                        width[2], height[2], pitch[2],
1975                                        0, 0,
1976                                        SURFACE_FORMAT_R8_UNORM, 0,
1977                                        base_index + 2);
1978         }
1979     }
1980 }
1981
1982 static int
1983 pp_null_x_steps(void *private_context)
1984 {
1985     return 1;
1986 }
1987
1988 static int
1989 pp_null_y_steps(void *private_context)
1990 {
1991     return 1;
1992 }
1993
1994 static int
1995 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1996 {
1997     return 0;
1998 }
1999
2000 static VAStatus
2001 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2002                    const struct i965_surface *src_surface,
2003                    const VARectangle *src_rect,
2004                    struct i965_surface *dst_surface,
2005                    const VARectangle *dst_rect,
2006                    void *filter_param)
2007 {
2008     /* private function & data */
2009     pp_context->pp_x_steps = pp_null_x_steps;
2010     pp_context->pp_y_steps = pp_null_y_steps;
2011     pp_context->private_context = NULL;
2012     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2013
2014     dst_surface->flags = src_surface->flags;
2015
2016     return VA_STATUS_SUCCESS;
2017 }
2018
2019 static int
2020 pp_load_save_x_steps(void *private_context)
2021 {
2022     return 1;
2023 }
2024
2025 static int
2026 pp_load_save_y_steps(void *private_context)
2027 {
2028     struct pp_load_save_context *pp_load_save_context = private_context;
2029
2030     return pp_load_save_context->dest_h / 8;
2031 }
2032
2033 static int
2034 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2035 {
2036     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2037     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2038
2039     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2040     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2041
2042     return 0;
2043 }
2044
2045 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2046 {
2047     int i;
2048     /* x offset of dest surface must be dword aligned.
2049      * so we have to extend dst surface on left edge, and mask out pixels not interested
2050      */
2051     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2052         pp_context->block_horizontal_mask_left = 0;
2053         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2054         {
2055             pp_context->block_horizontal_mask_left |= 1<<i;
2056         }
2057     }
2058     else {
2059         pp_context->block_horizontal_mask_left = 0xffff;
2060     }
2061     
2062     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2063     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2064         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2065     }
2066     else {
2067         pp_context->block_horizontal_mask_right = 0xffff;
2068     }
2069     
2070     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2071         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2072     }
2073     else {
2074         pp_context->block_vertical_mask_bottom = 0xff;
2075     }
2076
2077 }
2078 static VAStatus
2079 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2080                                 const struct i965_surface *src_surface,
2081                                 const VARectangle *src_rect,
2082                                 struct i965_surface *dst_surface,
2083                                 const VARectangle *dst_rect,
2084                                 void *filter_param)
2085 {
2086     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2087     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2088     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2089     int width[3], height[3], pitch[3], offset[3];
2090
2091     /* source surface */
2092     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2093                                     width, height, pitch, offset);
2094
2095     /* destination surface */
2096     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2097                                     width, height, pitch, offset);
2098
2099     /* private function & data */
2100     pp_context->pp_x_steps = pp_load_save_x_steps;
2101     pp_context->pp_y_steps = pp_load_save_y_steps;
2102     pp_context->private_context = &pp_context->pp_load_save_context;
2103     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2104
2105     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2106     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2107     pp_load_save_context->dest_y = dst_rect->y;
2108     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2109     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2110
2111     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2112     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2113
2114     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2115     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2116
2117     // update u/v offset for packed yuv
2118     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2119     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2120
2121     dst_surface->flags = src_surface->flags;
2122
2123     return VA_STATUS_SUCCESS;
2124 }
2125
2126 static int
2127 pp_scaling_x_steps(void *private_context)
2128 {
2129     return 1;
2130 }
2131
2132 static int
2133 pp_scaling_y_steps(void *private_context)
2134 {
2135     struct pp_scaling_context *pp_scaling_context = private_context;
2136
2137     return pp_scaling_context->dest_h / 8;
2138 }
2139
2140 static int
2141 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2142 {
2143     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2144     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2145     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2146     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2147     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2148
2149     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2150     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2151     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2152     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2153     
2154     return 0;
2155 }
2156
2157 static VAStatus
2158 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2159                            const struct i965_surface *src_surface,
2160                            const VARectangle *src_rect,
2161                            struct i965_surface *dst_surface,
2162                            const VARectangle *dst_rect,
2163                            void *filter_param)
2164 {
2165     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2166     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2167     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2168     struct object_surface *obj_surface;
2169     struct i965_sampler_state *sampler_state;
2170     int in_w, in_h, in_wpitch, in_hpitch;
2171     int out_w, out_h, out_wpitch, out_hpitch;
2172
2173     /* source surface */
2174     obj_surface = (struct object_surface *)src_surface->base;
2175     in_w = obj_surface->orig_width;
2176     in_h = obj_surface->orig_height;
2177     in_wpitch = obj_surface->width;
2178     in_hpitch = obj_surface->height;
2179
2180     /* source Y surface index 1 */
2181     i965_pp_set_surface_state(ctx, pp_context,
2182                               obj_surface->bo, 0,
2183                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2184                               1, 0);
2185
2186     /* source UV surface index 2 */
2187     i965_pp_set_surface_state(ctx, pp_context,
2188                               obj_surface->bo, in_wpitch * in_hpitch,
2189                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2190                               2, 0);
2191
2192     /* destination surface */
2193     obj_surface = (struct object_surface *)dst_surface->base;
2194     out_w = obj_surface->orig_width;
2195     out_h = obj_surface->orig_height;
2196     out_wpitch = obj_surface->width;
2197     out_hpitch = obj_surface->height;
2198
2199     /* destination Y surface index 7 */
2200     i965_pp_set_surface_state(ctx, pp_context,
2201                               obj_surface->bo, 0,
2202                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2203                               7, 1);
2204
2205     /* destination UV surface index 8 */
2206     i965_pp_set_surface_state(ctx, pp_context,
2207                               obj_surface->bo, out_wpitch * out_hpitch,
2208                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2209                               8, 1);
2210
2211     /* sampler state */
2212     dri_bo_map(pp_context->sampler_state_table.bo, True);
2213     assert(pp_context->sampler_state_table.bo->virtual);
2214     sampler_state = pp_context->sampler_state_table.bo->virtual;
2215
2216     /* SIMD16 Y index 1 */
2217     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2218     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2219     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2220     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2221     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2222
2223     /* SIMD16 UV index 2 */
2224     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2225     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2226     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2227     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2228     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2229
2230     dri_bo_unmap(pp_context->sampler_state_table.bo);
2231
2232     /* private function & data */
2233     pp_context->pp_x_steps = pp_scaling_x_steps;
2234     pp_context->pp_y_steps = pp_scaling_y_steps;
2235     pp_context->private_context = &pp_context->pp_scaling_context;
2236     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2237
2238     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2239     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2240     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2241     pp_scaling_context->dest_y = dst_rect->y;
2242     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2243     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2244     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2245     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2246
2247     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2248
2249     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2250     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2251     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2252
2253     dst_surface->flags = src_surface->flags;
2254
2255     return VA_STATUS_SUCCESS;
2256 }
2257
2258 static int
2259 pp_avs_x_steps(void *private_context)
2260 {
2261     struct pp_avs_context *pp_avs_context = private_context;
2262
2263     return pp_avs_context->dest_w / 16;
2264 }
2265
2266 static int
2267 pp_avs_y_steps(void *private_context)
2268 {
2269     return 1;
2270 }
2271
2272 static int
2273 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2274 {
2275     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2276     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2277     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2278     float src_x_steping, src_y_steping, video_step_delta;
2279     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2280
2281     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2282         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2283         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2284     } else if (tmp_w >= pp_avs_context->dest_w) {
2285         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2286         pp_inline_parameter->grf6.video_step_delta = 0;
2287         
2288         if (x == 0) {
2289             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2290                 pp_avs_context->src_normalized_x;
2291         } else {
2292             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2293             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2294             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2295                 16 * 15 * video_step_delta / 2;
2296         }
2297     } else {
2298         int n0, n1, n2, nls_left, nls_right;
2299         int factor_a = 5, factor_b = 4;
2300         float f;
2301
2302         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2303         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2304         n2 = tmp_w / (16 * factor_a);
2305         nls_left = n0 + n2;
2306         nls_right = n1 + n2;
2307         f = (float) n2 * 16 / tmp_w;
2308         
2309         if (n0 < 5) {
2310             pp_inline_parameter->grf6.video_step_delta = 0.0;
2311
2312             if (x == 0) {
2313                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2314                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2315             } else {
2316                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2317                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2318                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2319                     16 * 15 * video_step_delta / 2;
2320             }
2321         } else {
2322             if (x < nls_left) {
2323                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2324                 float a = f / (nls_left * 16 * factor_b);
2325                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2326                 
2327                 pp_inline_parameter->grf6.video_step_delta = b;
2328
2329                 if (x == 0) {
2330                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2331                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2332                 } else {
2333                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2334                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2335                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2336                         16 * 15 * video_step_delta / 2;
2337                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2338                 }
2339             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2340                 /* scale the center linearly */
2341                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2342                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2343                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2344                     16 * 15 * video_step_delta / 2;
2345                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2346                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2347             } else {
2348                 float a = f / (nls_right * 16 * factor_b);
2349                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2350
2351                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2352                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2353                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2354                     16 * 15 * video_step_delta / 2;
2355                 pp_inline_parameter->grf6.video_step_delta = -b;
2356
2357                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2358                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2359                 else
2360                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2361             }
2362         }
2363     }
2364
2365     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2366     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2367     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2368     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2369
2370     return 0;
2371 }
2372
2373 static VAStatus
2374 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2375                        const struct i965_surface *src_surface,
2376                        const VARectangle *src_rect,
2377                        struct i965_surface *dst_surface,
2378                        const VARectangle *dst_rect,
2379                        void *filter_param,
2380                        int nlas)
2381 {
2382     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2383     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2384     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2385     struct object_surface *obj_surface;
2386     struct i965_sampler_8x8 *sampler_8x8;
2387     struct i965_sampler_8x8_state *sampler_8x8_state;
2388     int index;
2389     int in_w, in_h, in_wpitch, in_hpitch;
2390     int out_w, out_h, out_wpitch, out_hpitch;
2391     int i;
2392
2393     /* surface */
2394     obj_surface = (struct object_surface *)src_surface->base;
2395     in_w = obj_surface->orig_width;
2396     in_h = obj_surface->orig_height;
2397     in_wpitch = obj_surface->width;
2398     in_hpitch = obj_surface->height;
2399
2400     /* source Y surface index 1 */
2401     i965_pp_set_surface2_state(ctx, pp_context,
2402                                obj_surface->bo, 0,
2403                                in_w, in_h, in_wpitch,
2404                                0, 0,
2405                                SURFACE_FORMAT_Y8_UNORM, 0,
2406                                1);
2407
2408     /* source UV surface index 2 */
2409     i965_pp_set_surface2_state(ctx, pp_context,
2410                                obj_surface->bo, in_wpitch * in_hpitch,
2411                                in_w / 2, in_h / 2, in_wpitch,
2412                                0, 0,
2413                                SURFACE_FORMAT_R8B8_UNORM, 0,
2414                                2);
2415
2416     /* destination surface */
2417     obj_surface = (struct object_surface *)dst_surface->base;
2418     out_w = obj_surface->orig_width;
2419     out_h = obj_surface->orig_height;
2420     out_wpitch = obj_surface->width;
2421     out_hpitch = obj_surface->height;
2422     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2423
2424     /* destination Y surface index 7 */
2425     i965_pp_set_surface_state(ctx, pp_context,
2426                               obj_surface->bo, 0,
2427                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2428                               7, 1);
2429
2430     /* destination UV surface index 8 */
2431     i965_pp_set_surface_state(ctx, pp_context,
2432                               obj_surface->bo, out_wpitch * out_hpitch,
2433                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2434                               8, 1);
2435
2436     /* sampler 8x8 state */
2437     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2438     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2439     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2440     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2441     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2442
2443     for (i = 0; i < 17; i++) {
2444         /* for Y channel, currently ignore */
2445         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2446         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2447         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2448         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2449         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2450         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2451         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2452         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2453         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2454         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2455         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2456         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2457         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2458         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2459         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2460         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2461         /* for U/V channel, 0.25 */
2462         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2463         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2464         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2465         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2466         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2467         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2468         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2469         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2470         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2471         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2472         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2473         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2474         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2475         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2476         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2477         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2478     }
2479
2480     sampler_8x8_state->dw136.default_sharpness_level = 0;
2481     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2482     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2483     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2484     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2485
2486     /* sampler 8x8 */
2487     dri_bo_map(pp_context->sampler_state_table.bo, True);
2488     assert(pp_context->sampler_state_table.bo->virtual);
2489     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2490     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2491
2492     /* sample_8x8 Y index 1 */
2493     index = 1;
2494     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2495     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2496     sampler_8x8[index].dw0.ief_bypass = 1;
2497     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2498     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2499     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2500     sampler_8x8[index].dw2.global_noise_estimation = 22;
2501     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2502     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2503     sampler_8x8[index].dw3.strong_edge_weight = 7;
2504     sampler_8x8[index].dw3.regular_weight = 2;
2505     sampler_8x8[index].dw3.non_edge_weight = 0;
2506     sampler_8x8[index].dw3.gain_factor = 40;
2507     sampler_8x8[index].dw4.steepness_boost = 0;
2508     sampler_8x8[index].dw4.steepness_threshold = 0;
2509     sampler_8x8[index].dw4.mr_boost = 0;
2510     sampler_8x8[index].dw4.mr_threshold = 5;
2511     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2512     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2513     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2514     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2515     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2516     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2517     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2518     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2519     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2520     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2521     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2522     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2523     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2524     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2525     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2526     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2527     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2528     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2529     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2530     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2531     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2532     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2533     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2534     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2535     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2536     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2537     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2538     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2539     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2540     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2541     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2542     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2543     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2544     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2545     sampler_8x8[index].dw13.limiter_boost = 0;
2546     sampler_8x8[index].dw13.minimum_limiter = 10;
2547     sampler_8x8[index].dw13.maximum_limiter = 11;
2548     sampler_8x8[index].dw14.clip_limiter = 130;
2549     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2550                       I915_GEM_DOMAIN_RENDER, 
2551                       0,
2552                       0,
2553                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2554                       pp_context->sampler_state_table.bo_8x8);
2555
2556     /* sample_8x8 UV index 2 */
2557     index = 2;
2558     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2559     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2560     sampler_8x8[index].dw0.ief_bypass = 1;
2561     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2562     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2563     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2564     sampler_8x8[index].dw2.global_noise_estimation = 22;
2565     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2566     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2567     sampler_8x8[index].dw3.strong_edge_weight = 7;
2568     sampler_8x8[index].dw3.regular_weight = 2;
2569     sampler_8x8[index].dw3.non_edge_weight = 0;
2570     sampler_8x8[index].dw3.gain_factor = 40;
2571     sampler_8x8[index].dw4.steepness_boost = 0;
2572     sampler_8x8[index].dw4.steepness_threshold = 0;
2573     sampler_8x8[index].dw4.mr_boost = 0;
2574     sampler_8x8[index].dw4.mr_threshold = 5;
2575     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2576     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2577     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2578     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2579     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2580     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2581     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2582     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2583     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2584     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2585     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2586     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2587     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2588     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2589     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2590     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2591     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2592     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2593     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2594     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2595     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2596     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2597     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2598     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2599     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2600     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2601     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2602     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2603     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2604     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2605     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2606     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2607     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2608     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2609     sampler_8x8[index].dw13.limiter_boost = 0;
2610     sampler_8x8[index].dw13.minimum_limiter = 10;
2611     sampler_8x8[index].dw13.maximum_limiter = 11;
2612     sampler_8x8[index].dw14.clip_limiter = 130;
2613     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2614                       I915_GEM_DOMAIN_RENDER, 
2615                       0,
2616                       0,
2617                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2618                       pp_context->sampler_state_table.bo_8x8);
2619
2620     dri_bo_unmap(pp_context->sampler_state_table.bo);
2621
2622     /* private function & data */
2623     pp_context->pp_x_steps = pp_avs_x_steps;
2624     pp_context->pp_y_steps = pp_avs_y_steps;
2625     pp_context->private_context = &pp_context->pp_avs_context;
2626     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2627
2628     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2629     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2630     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2631     pp_avs_context->dest_y = dst_rect->y;
2632     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2633     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2634     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2635     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2636     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2637     pp_avs_context->src_h = src_rect->height;
2638
2639     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2640     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2641
2642     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2643     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2644     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2645     pp_inline_parameter->grf6.video_step_delta = 0.0;
2646
2647     dst_surface->flags = src_surface->flags;
2648
2649     return VA_STATUS_SUCCESS;
2650 }
2651
2652 static VAStatus
2653 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2654                             const struct i965_surface *src_surface,
2655                             const VARectangle *src_rect,
2656                             struct i965_surface *dst_surface,
2657                             const VARectangle *dst_rect,
2658                             void *filter_param)
2659 {
2660     return pp_nv12_avs_initialize(ctx, pp_context,
2661                                   src_surface,
2662                                   src_rect,
2663                                   dst_surface,
2664                                   dst_rect,
2665                                   filter_param,
2666                                   1);
2667 }
2668
2669 static VAStatus
2670 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2671                              const struct i965_surface *src_surface,
2672                              const VARectangle *src_rect,
2673                              struct i965_surface *dst_surface,
2674                              const VARectangle *dst_rect,
2675                              void *filter_param)
2676 {
2677     return pp_nv12_avs_initialize(ctx, pp_context,
2678                                   src_surface,
2679                                   src_rect,
2680                                   dst_surface,
2681                                   dst_rect,
2682                                   filter_param,
2683                                   0);    
2684 }
2685
2686 static int
2687 gen7_pp_avs_x_steps(void *private_context)
2688 {
2689     struct pp_avs_context *pp_avs_context = private_context;
2690
2691     return pp_avs_context->dest_w / 16;
2692 }
2693
2694 static int
2695 gen7_pp_avs_y_steps(void *private_context)
2696 {
2697     struct pp_avs_context *pp_avs_context = private_context;
2698
2699     return pp_avs_context->dest_h / 16;
2700 }
2701
2702 static int
2703 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2704 {
2705     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2706     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2707
2708     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2709     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2710     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2711     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
2712
2713     return 0;
2714 }
2715
2716 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2717                                               struct i965_post_processing_context *pp_context,
2718                                               const struct i965_surface *surface)
2719 {
2720     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2721     int fourcc = pp_get_surface_fourcc(ctx, surface);
2722     
2723     if (fourcc == VA_FOURCC_YUY2) {
2724         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2725         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2726         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2727     } else if (fourcc == VA_FOURCC_UYVY) {
2728         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2729         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2730         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2731     }
2732 }
2733
2734 static VAStatus
2735 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2736                            const struct i965_surface *src_surface,
2737                            const VARectangle *src_rect,
2738                            struct i965_surface *dst_surface,
2739                            const VARectangle *dst_rect,
2740                            void *filter_param)
2741 {
2742     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2743     struct i965_driver_data *i965 = i965_driver_data(ctx);
2744     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2745     struct gen7_sampler_8x8 *sampler_8x8;
2746     struct i965_sampler_8x8_state *sampler_8x8_state;
2747     int index, i;
2748     int width[3], height[3], pitch[3], offset[3];
2749     int src_width, src_height;
2750
2751     /* source surface */
2752     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2753                                          width, height, pitch, offset);
2754     src_width = width[0];
2755     src_height = height[0];
2756
2757     /* destination surface */
2758     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2759                                          width, height, pitch, offset);
2760
2761     /* sampler 8x8 state */
2762     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2763     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2764     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2765     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2766     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2767
2768     for (i = 0; i < 17; i++) {
2769         float coff;
2770         coff = i;
2771         coff = coff / 16;
2772         /* for Y channel, currently ignore */
2773         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2774         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2775         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2776         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
2777         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2778         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2779         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2780         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2781         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2782         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2783         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2784         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2785         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2786         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2787         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2788         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2789         /* for U/V channel, 0.25 */
2790         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2791         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2792         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
2793         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2794         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2795         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
2796         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2797         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2798         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2799         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2800         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
2801         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2802         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2803         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
2804         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2805         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2806     }
2807
2808     sampler_8x8_state->dw136.default_sharpness_level = 0;
2809     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2810     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2811     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2812     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2813
2814     /* sampler 8x8 */
2815     dri_bo_map(pp_context->sampler_state_table.bo, True);
2816     assert(pp_context->sampler_state_table.bo->virtual);
2817     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2818     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2819
2820     /* sample_8x8 Y index 4 */
2821     index = 4;
2822     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2823     sampler_8x8[index].dw0.global_noise_estimation = 255;
2824     sampler_8x8[index].dw0.ief_bypass = 1;
2825
2826     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2827
2828     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2829     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2830     sampler_8x8[index].dw2.r5x_coefficient = 9;
2831     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2832     sampler_8x8[index].dw2.r5c_coefficient = 3;
2833
2834     sampler_8x8[index].dw3.r3x_coefficient = 27;
2835     sampler_8x8[index].dw3.r3c_coefficient = 5;
2836     sampler_8x8[index].dw3.gain_factor = 40;
2837     sampler_8x8[index].dw3.non_edge_weight = 1;
2838     sampler_8x8[index].dw3.regular_weight = 2;
2839     sampler_8x8[index].dw3.strong_edge_weight = 7;
2840     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2841
2842     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2843                       I915_GEM_DOMAIN_RENDER, 
2844                       0,
2845                       0,
2846                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2847                       pp_context->sampler_state_table.bo_8x8);
2848
2849     /* sample_8x8 UV index 8 */
2850     index = 8;
2851     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2852     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2853     sampler_8x8[index].dw0.global_noise_estimation = 255;
2854     sampler_8x8[index].dw0.ief_bypass = 1;
2855     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2856     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2857     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2858     sampler_8x8[index].dw2.r5x_coefficient = 9;
2859     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2860     sampler_8x8[index].dw2.r5c_coefficient = 3;
2861     sampler_8x8[index].dw3.r3x_coefficient = 27;
2862     sampler_8x8[index].dw3.r3c_coefficient = 5;
2863     sampler_8x8[index].dw3.gain_factor = 40;
2864     sampler_8x8[index].dw3.non_edge_weight = 1;
2865     sampler_8x8[index].dw3.regular_weight = 2;
2866     sampler_8x8[index].dw3.strong_edge_weight = 7;
2867     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2868
2869     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2870                       I915_GEM_DOMAIN_RENDER, 
2871                       0,
2872                       0,
2873                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2874                       pp_context->sampler_state_table.bo_8x8);
2875
2876     /* sampler_8x8 V, index 12 */
2877     index = 12;
2878     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2879     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2880     sampler_8x8[index].dw0.global_noise_estimation = 255;
2881     sampler_8x8[index].dw0.ief_bypass = 1;
2882     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2883     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2884     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2885     sampler_8x8[index].dw2.r5x_coefficient = 9;
2886     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2887     sampler_8x8[index].dw2.r5c_coefficient = 3;
2888     sampler_8x8[index].dw3.r3x_coefficient = 27;
2889     sampler_8x8[index].dw3.r3c_coefficient = 5;
2890     sampler_8x8[index].dw3.gain_factor = 40;
2891     sampler_8x8[index].dw3.non_edge_weight = 1;
2892     sampler_8x8[index].dw3.regular_weight = 2;
2893     sampler_8x8[index].dw3.strong_edge_weight = 7;
2894     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2895
2896     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2897                       I915_GEM_DOMAIN_RENDER, 
2898                       0,
2899                       0,
2900                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2901                       pp_context->sampler_state_table.bo_8x8);
2902
2903     dri_bo_unmap(pp_context->sampler_state_table.bo);
2904
2905     /* private function & data */
2906     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2907     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2908     pp_context->private_context = &pp_context->pp_avs_context;
2909     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2910
2911     pp_avs_context->dest_x = dst_rect->x;
2912     pp_avs_context->dest_y = dst_rect->y;
2913     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2914     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2915     pp_avs_context->src_w = src_rect->width;
2916     pp_avs_context->src_h = src_rect->height;
2917     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
2918
2919     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2920     dw = MAX(dw, dst_rect->width);
2921
2922     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2923     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2924     if (IS_HASWELL(i965->intel.device_info))
2925         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
2926
2927     if (pp_static_parameter->grf2.avs_wa_enable) {
2928         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
2929         if ((src_fourcc == VA_FOURCC_RGBA) ||
2930             (src_fourcc == VA_FOURCC_RGBX) ||
2931             (src_fourcc == VA_FOURCC_BGRA) ||
2932             (src_fourcc == VA_FOURCC_BGRX)) {
2933             pp_static_parameter->grf2.avs_wa_enable = 0;
2934         }
2935     }
2936         
2937     pp_static_parameter->grf2.avs_wa_width = dw;
2938     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2939     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2940     pp_static_parameter->grf2.alpha = 255;
2941
2942     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2943     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
2944     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
2945         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
2946     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
2947         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
2948
2949     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2950
2951     dst_surface->flags = src_surface->flags;
2952
2953     return VA_STATUS_SUCCESS;
2954 }
2955
2956 static int
2957 pp_dndi_x_steps(void *private_context)
2958 {
2959     return 1;
2960 }
2961
2962 static int
2963 pp_dndi_y_steps(void *private_context)
2964 {
2965     struct pp_dndi_context *pp_dndi_context = private_context;
2966
2967     return pp_dndi_context->dest_h / 4;
2968 }
2969
2970 static int
2971 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2972 {
2973     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2974
2975     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2976     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2977
2978     return 0;
2979 }
2980
2981 static VAStatus
2982 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2983                         const struct i965_surface *src_surface,
2984                         const VARectangle *src_rect,
2985                         struct i965_surface *dst_surface,
2986                         const VARectangle *dst_rect,
2987                         void *filter_param)
2988 {
2989     struct i965_driver_data *i965 = i965_driver_data(ctx);
2990     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
2991     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2992     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2993     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
2994     struct i965_sampler_dndi *sampler_dndi;
2995     int index;
2996     int w, h;
2997     int orig_w, orig_h;
2998     int dndi_top_first = 1;
2999     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3000     int is_first_frame = (pp_dndi_context->frame_order == -1);
3001
3002     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3003         dndi_top_first = 0;
3004     else
3005         dndi_top_first = 1;
3006
3007     /* surface */
3008     current_in_obj_surface = (struct object_surface *)src_surface->base;
3009
3010     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
3011         previous_in_obj_surface = current_in_obj_surface;
3012         is_first_frame = 1;
3013     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
3014         if (pp_dndi_context->frame_order == 0) {
3015             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
3016             if (!pipeline_param ||
3017                 !pipeline_param->num_forward_references ||
3018                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
3019                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
3020
3021                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3022             } else {
3023                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
3024                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
3025
3026                 is_first_frame = 0;
3027             }
3028         } else if (pp_dndi_context->frame_order == 1) {
3029             vpp_surface_convert(ctx,
3030                                 pp_dndi_context->current_out_obj_surface,
3031                                 (struct object_surface *)dst_surface->base);
3032             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3033             is_first_frame = 0;
3034
3035             return VA_STATUS_SUCCESS_1;
3036         } else {
3037             previous_in_obj_surface = current_in_obj_surface;
3038             is_first_frame = 1;
3039         }
3040     } else {
3041         return VA_STATUS_ERROR_UNIMPLEMENTED;
3042     }
3043
3044     /* source (temporal reference) YUV surface index 5 */
3045     orig_w = previous_in_obj_surface->orig_width;
3046     orig_h = previous_in_obj_surface->orig_height;
3047     w = previous_in_obj_surface->width;
3048     h = previous_in_obj_surface->height;
3049     i965_pp_set_surface2_state(ctx, pp_context,
3050                                previous_in_obj_surface->bo, 0,
3051                                orig_w, orig_h, w,
3052                                0, h,
3053                                SURFACE_FORMAT_PLANAR_420_8, 1,
3054                                5);
3055
3056     /* source surface */
3057     orig_w = current_in_obj_surface->orig_width;
3058     orig_h = current_in_obj_surface->orig_height;
3059     w = current_in_obj_surface->width;
3060     h = current_in_obj_surface->height;
3061
3062     /* source UV surface index 2 */
3063     i965_pp_set_surface_state(ctx, pp_context,
3064                               current_in_obj_surface->bo, w * h,
3065                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3066                               2, 0);
3067
3068     /* source YUV surface index 4 */
3069     i965_pp_set_surface2_state(ctx, pp_context,
3070                                current_in_obj_surface->bo, 0,
3071                                orig_w, orig_h, w,
3072                                0, h,
3073                                SURFACE_FORMAT_PLANAR_420_8, 1,
3074                                4);
3075
3076     /* source STMM surface index 6 */
3077     if (pp_dndi_context->stmm_bo == NULL) {
3078         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3079                                                 "STMM surface",
3080                                                 w * h,
3081                                                 4096);
3082         assert(pp_dndi_context->stmm_bo);
3083     }
3084
3085     i965_pp_set_surface_state(ctx, pp_context,
3086                               pp_dndi_context->stmm_bo, 0,
3087                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3088                               6, 0);
3089
3090     /* destination (Previous frame) */
3091     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
3092     orig_w = previous_out_obj_surface->orig_width;
3093     orig_h = previous_out_obj_surface->orig_height;
3094     w = previous_out_obj_surface->width;
3095     h = previous_out_obj_surface->height;
3096
3097     if (is_first_frame) {
3098         current_out_obj_surface = previous_out_obj_surface;
3099     } else {
3100         VAStatus va_status;
3101
3102         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
3103             unsigned int tiling = 0, swizzle = 0;
3104             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
3105
3106             va_status = i965_CreateSurfaces(ctx,
3107                                             orig_w,
3108                                             orig_h,
3109                                             VA_RT_FORMAT_YUV420,
3110                                             1,
3111                                             &pp_dndi_context->current_out_surface);
3112             assert(va_status == VA_STATUS_SUCCESS);
3113             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
3114             assert(pp_dndi_context->current_out_obj_surface);
3115             i965_check_alloc_surface_bo(ctx,
3116                                         pp_dndi_context->current_out_obj_surface,
3117                                         tiling != I915_TILING_NONE,
3118                                         VA_FOURCC_NV12,
3119                                         SUBSAMPLE_YUV420);
3120         }
3121
3122         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
3123     }
3124
3125     /* destination (Previous frame) Y surface index 7 */
3126     i965_pp_set_surface_state(ctx, pp_context,
3127                               previous_out_obj_surface->bo, 0,
3128                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3129                               7, 1);
3130
3131     /* destination (Previous frame) UV surface index 8 */
3132     i965_pp_set_surface_state(ctx, pp_context,
3133                               previous_out_obj_surface->bo, w * h,
3134                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3135                               8, 1);
3136
3137     /* destination(Current frame) */
3138     orig_w = current_out_obj_surface->orig_width;
3139     orig_h = current_out_obj_surface->orig_height;
3140     w = current_out_obj_surface->width;
3141     h = current_out_obj_surface->height;
3142
3143     /* destination (Current frame) Y surface index xxx */
3144     i965_pp_set_surface_state(ctx, pp_context,
3145                               current_out_obj_surface->bo, 0,
3146                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3147                               10, 1);
3148
3149     /* destination (Current frame) UV surface index xxx */
3150     i965_pp_set_surface_state(ctx, pp_context,
3151                               current_out_obj_surface->bo, w * h,
3152                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3153                               11, 1);
3154
3155     /* STMM output surface, index 20 */
3156     i965_pp_set_surface_state(ctx, pp_context,
3157                               pp_dndi_context->stmm_bo, 0,
3158                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3159                               20, 1);
3160
3161     /* sampler dndi */
3162     dri_bo_map(pp_context->sampler_state_table.bo, True);
3163     assert(pp_context->sampler_state_table.bo->virtual);
3164     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3165     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3166
3167     /* sample dndi index 1 */
3168     index = 0;
3169     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3170     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3171     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3172     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3173
3174     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3175     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3176     sampler_dndi[index].dw1.stmm_c2 = 1;
3177     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3178     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3179
3180     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3181     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3182     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3183     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3184
3185     sampler_dndi[index].dw3.maximum_stmm = 150;
3186     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3187     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3188     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3189     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3190
3191     sampler_dndi[index].dw4.sdi_delta = 5;
3192     sampler_dndi[index].dw4.sdi_threshold = 100;
3193     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3194     sampler_dndi[index].dw4.stmm_shift_up = 1;
3195     sampler_dndi[index].dw4.stmm_shift_down = 0;
3196     sampler_dndi[index].dw4.minimum_stmm = 118;
3197
3198     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3199     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3200     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3201     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3202
3203     sampler_dndi[index].dw6.dn_enable = 1;
3204     sampler_dndi[index].dw6.di_enable = 1;
3205     sampler_dndi[index].dw6.di_partial = 0;
3206     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3207     sampler_dndi[index].dw6.dndi_stream_id = 0;
3208     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
3209     sampler_dndi[index].dw6.progressive_dn = 0;
3210     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3211     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3212     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3213
3214     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3215     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3216     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3217     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3218
3219     dri_bo_unmap(pp_context->sampler_state_table.bo);
3220
3221     /* private function & data */
3222     pp_context->pp_x_steps = pp_dndi_x_steps;
3223     pp_context->pp_y_steps = pp_dndi_y_steps;
3224     pp_context->private_context = &pp_context->pp_dndi_context;
3225     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3226
3227     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3228     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3229     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3230     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3231
3232     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3233     pp_inline_parameter->grf5.number_blocks = w / 16;
3234     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3235     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3236
3237     pp_dndi_context->dest_w = w;
3238     pp_dndi_context->dest_h = h;
3239
3240     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3241
3242     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3243
3244     return VA_STATUS_SUCCESS;
3245 }
3246
3247 static int
3248 pp_dn_x_steps(void *private_context)
3249 {
3250     return 1;
3251 }
3252
3253 static int
3254 pp_dn_y_steps(void *private_context)
3255 {
3256     struct pp_dn_context *pp_dn_context = private_context;
3257
3258     return pp_dn_context->dest_h / 8;
3259 }
3260
3261 static int
3262 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3263 {
3264     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3265
3266     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3267     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3268
3269     return 0;
3270 }
3271
3272 static VAStatus
3273 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3274                       const struct i965_surface *src_surface,
3275                       const VARectangle *src_rect,
3276                       struct i965_surface *dst_surface,
3277                       const VARectangle *dst_rect,
3278                       void *filter_param)
3279 {
3280     struct i965_driver_data *i965 = i965_driver_data(ctx);
3281     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3282     struct object_surface *obj_surface;
3283     struct i965_sampler_dndi *sampler_dndi;
3284     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3285     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3286     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3287     int index;
3288     int w, h;
3289     int orig_w, orig_h;
3290     int dn_strength = 15;
3291     int dndi_top_first = 1;
3292     int dn_progressive = 0;
3293
3294     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3295         dndi_top_first = 1;
3296         dn_progressive = 1;
3297     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3298         dndi_top_first = 1;
3299         dn_progressive = 0;
3300     } else {
3301         dndi_top_first = 0;
3302         dn_progressive = 0;
3303     }
3304
3305     if (dn_filter_param) {
3306         float value = dn_filter_param->value;
3307         
3308         if (value > 1.0)
3309             value = 1.0;
3310         
3311         if (value < 0.0)
3312             value = 0.0;
3313
3314         dn_strength = (int)(value * 31.0F);
3315     }
3316
3317     /* surface */
3318     obj_surface = (struct object_surface *)src_surface->base;
3319     orig_w = obj_surface->orig_width;
3320     orig_h = obj_surface->orig_height;
3321     w = obj_surface->width;
3322     h = obj_surface->height;
3323
3324     if (pp_dn_context->stmm_bo == NULL) {
3325         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3326                                               "STMM surface",
3327                                               w * h,
3328                                               4096);
3329         assert(pp_dn_context->stmm_bo);
3330     }
3331
3332     /* source UV surface index 2 */
3333     i965_pp_set_surface_state(ctx, pp_context,
3334                               obj_surface->bo, w * h,
3335                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3336                               2, 0);
3337
3338     /* source YUV surface index 4 */
3339     i965_pp_set_surface2_state(ctx, pp_context,
3340                                obj_surface->bo, 0,
3341                                orig_w, orig_h, w,
3342                                0, h,
3343                                SURFACE_FORMAT_PLANAR_420_8, 1,
3344                                4);
3345
3346     /* source STMM surface index 20 */
3347     i965_pp_set_surface_state(ctx, pp_context,
3348                               pp_dn_context->stmm_bo, 0,
3349                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3350                               20, 1);
3351
3352     /* destination surface */
3353     obj_surface = (struct object_surface *)dst_surface->base;
3354     orig_w = obj_surface->orig_width;
3355     orig_h = obj_surface->orig_height;
3356     w = obj_surface->width;
3357     h = obj_surface->height;
3358
3359     /* destination Y surface index 7 */
3360     i965_pp_set_surface_state(ctx, pp_context,
3361                               obj_surface->bo, 0,
3362                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3363                               7, 1);
3364
3365     /* destination UV surface index 8 */
3366     i965_pp_set_surface_state(ctx, pp_context,
3367                               obj_surface->bo, w * h,
3368                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3369                               8, 1);
3370     /* sampler dn */
3371     dri_bo_map(pp_context->sampler_state_table.bo, True);
3372     assert(pp_context->sampler_state_table.bo->virtual);
3373     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3374     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3375
3376     /* sample dndi index 1 */
3377     index = 0;
3378     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3379     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3380     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3381     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3382
3383     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3384     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3385     sampler_dndi[index].dw1.stmm_c2 = 0;
3386     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3387     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3388
3389     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3390     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3391     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3392     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3393
3394     sampler_dndi[index].dw3.maximum_stmm = 128;
3395     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3396     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3397     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3398     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3399
3400     sampler_dndi[index].dw4.sdi_delta = 8;
3401     sampler_dndi[index].dw4.sdi_threshold = 128;
3402     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3403     sampler_dndi[index].dw4.stmm_shift_up = 0;
3404     sampler_dndi[index].dw4.stmm_shift_down = 0;
3405     sampler_dndi[index].dw4.minimum_stmm = 0;
3406
3407     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3408     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3409     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3410     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3411
3412     sampler_dndi[index].dw6.dn_enable = 1;
3413     sampler_dndi[index].dw6.di_enable = 0;
3414     sampler_dndi[index].dw6.di_partial = 0;
3415     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3416     sampler_dndi[index].dw6.dndi_stream_id = 1;
3417     sampler_dndi[index].dw6.dndi_first_frame = 1;
3418     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3419     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3420     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3421     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3422
3423     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3424     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3425     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3426     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3427
3428     dri_bo_unmap(pp_context->sampler_state_table.bo);
3429
3430     /* private function & data */
3431     pp_context->pp_x_steps = pp_dn_x_steps;
3432     pp_context->pp_y_steps = pp_dn_y_steps;
3433     pp_context->private_context = &pp_context->pp_dn_context;
3434     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3435
3436     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3437     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3438     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3439     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3440
3441     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3442     pp_inline_parameter->grf5.number_blocks = w / 16;
3443     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3444     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3445
3446     pp_dn_context->dest_w = w;
3447     pp_dn_context->dest_h = h;
3448
3449     dst_surface->flags = src_surface->flags;
3450     
3451     return VA_STATUS_SUCCESS;
3452 }
3453
3454 static int
3455 gen7_pp_dndi_x_steps(void *private_context)
3456 {
3457     struct pp_dndi_context *pp_dndi_context = private_context;
3458
3459     return pp_dndi_context->dest_w / 16;
3460 }
3461
3462 static int
3463 gen7_pp_dndi_y_steps(void *private_context)
3464 {
3465     struct pp_dndi_context *pp_dndi_context = private_context;
3466
3467     return pp_dndi_context->dest_h / 4;
3468 }
3469
3470 static int
3471 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3472 {
3473     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3474
3475     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3476     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3477
3478     return 0;
3479 }
3480
3481 static VAStatus
3482 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3483                              const struct i965_surface *src_surface,
3484                              const VARectangle *src_rect,
3485                              struct i965_surface *dst_surface,
3486                              const VARectangle *dst_rect,
3487                              void *filter_param)
3488 {
3489     struct i965_driver_data *i965 = i965_driver_data(ctx);
3490     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3491     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3492     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
3493     struct gen7_sampler_dndi *sampler_dndi;
3494     int index;
3495     int w, h;
3496     int orig_w, orig_h;
3497     int dndi_top_first = 1;
3498     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3499     int is_first_frame = (pp_dndi_context->frame_order == -1);
3500
3501     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3502         dndi_top_first = 0;
3503     else
3504         dndi_top_first = 1;
3505
3506     /* surface */
3507     current_in_obj_surface = (struct object_surface *)src_surface->base;
3508
3509     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
3510         previous_in_obj_surface = current_in_obj_surface;
3511         is_first_frame = 1;
3512     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
3513         if (pp_dndi_context->frame_order == 0) {
3514             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
3515             if (!pipeline_param ||
3516                 !pipeline_param->num_forward_references ||
3517                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
3518                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
3519
3520                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3521             } else {
3522                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
3523                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
3524
3525                 is_first_frame = 0;
3526             }
3527         } else if (pp_dndi_context->frame_order == 1) {
3528             vpp_surface_convert(ctx,
3529                                 pp_dndi_context->current_out_obj_surface,
3530                                 (struct object_surface *)dst_surface->base);
3531             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3532             is_first_frame = 0;
3533
3534             return VA_STATUS_SUCCESS_1;
3535         } else {
3536             previous_in_obj_surface = current_in_obj_surface;
3537             is_first_frame = 1;
3538         }
3539     } else {
3540         return VA_STATUS_ERROR_UNIMPLEMENTED;
3541     }
3542
3543     /* source (temporal reference) YUV surface index 4 */
3544     orig_w = previous_in_obj_surface->orig_width;
3545     orig_h = previous_in_obj_surface->orig_height;
3546     w = previous_in_obj_surface->width;
3547     h = previous_in_obj_surface->height;
3548     gen7_pp_set_surface2_state(ctx, pp_context,
3549                                previous_in_obj_surface->bo, 0,
3550                                orig_w, orig_h, w,
3551                                0, h,
3552                                SURFACE_FORMAT_PLANAR_420_8, 1,
3553                                4);
3554
3555     /* source surface */
3556     orig_w = current_in_obj_surface->orig_width;
3557     orig_h = current_in_obj_surface->orig_height;
3558     w = current_in_obj_surface->width;
3559     h = current_in_obj_surface->height;
3560
3561     /* source UV surface index 1 */
3562     gen7_pp_set_surface_state(ctx, pp_context,
3563                               current_in_obj_surface->bo, w * h,
3564                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3565                               1, 0);
3566
3567     /* source YUV surface index 3 */
3568     gen7_pp_set_surface2_state(ctx, pp_context,
3569                                current_in_obj_surface->bo, 0,
3570                                orig_w, orig_h, w,
3571                                0, h,
3572                                SURFACE_FORMAT_PLANAR_420_8, 1,
3573                                3);
3574
3575     /* STMM / History Statistics input surface, index 5 */
3576     if (pp_dndi_context->stmm_bo == NULL) {
3577         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3578                                                 "STMM surface",
3579                                                 w * h,
3580                                                 4096);
3581         assert(pp_dndi_context->stmm_bo);
3582     }
3583
3584     gen7_pp_set_surface_state(ctx, pp_context,
3585                               pp_dndi_context->stmm_bo, 0,
3586                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3587                               5, 1);
3588
3589     /* destination surface */
3590     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
3591     orig_w = previous_out_obj_surface->orig_width;
3592     orig_h = previous_out_obj_surface->orig_height;
3593     w = previous_out_obj_surface->width;
3594     h = previous_out_obj_surface->height;
3595
3596     if (is_first_frame) {
3597         current_out_obj_surface = previous_out_obj_surface;
3598     } else {
3599         VAStatus va_status;
3600
3601         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
3602             unsigned int tiling = 0, swizzle = 0;
3603             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
3604
3605             va_status = i965_CreateSurfaces(ctx,
3606                                             orig_w,
3607                                             orig_h,
3608                                             VA_RT_FORMAT_YUV420,
3609                                             1,
3610                                             &pp_dndi_context->current_out_surface);
3611             assert(va_status == VA_STATUS_SUCCESS);
3612             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
3613             assert(pp_dndi_context->current_out_obj_surface);
3614             i965_check_alloc_surface_bo(ctx,
3615                                         pp_dndi_context->current_out_obj_surface,
3616                                         tiling != I915_TILING_NONE,
3617                                         VA_FOURCC_NV12,
3618                                         SUBSAMPLE_YUV420);
3619         }
3620
3621         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
3622     }
3623
3624     /* destination(Previous frame) Y surface index 27 */
3625     gen7_pp_set_surface_state(ctx, pp_context,
3626                               previous_out_obj_surface->bo, 0,
3627                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3628                               27, 1);
3629
3630     /* destination(Previous frame) UV surface index 28 */
3631     gen7_pp_set_surface_state(ctx, pp_context,
3632                               previous_out_obj_surface->bo, w * h,
3633                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3634                               28, 1);
3635
3636     /* destination(Current frame) Y surface index 30 */
3637     gen7_pp_set_surface_state(ctx, pp_context,
3638                               current_out_obj_surface->bo, 0,
3639                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3640                               30, 1);
3641
3642     /* destination(Current frame) UV surface index 31 */
3643     orig_w = current_out_obj_surface->orig_width;
3644     orig_h = current_out_obj_surface->orig_height;
3645     w = current_out_obj_surface->width;
3646     h = current_out_obj_surface->height;
3647
3648     gen7_pp_set_surface_state(ctx, pp_context,
3649                               current_out_obj_surface->bo, w * h,
3650                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3651                               31, 1);
3652
3653     /* STMM output surface, index 33 */
3654     gen7_pp_set_surface_state(ctx, pp_context,
3655                               pp_dndi_context->stmm_bo, 0,
3656                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3657                               33, 1);
3658
3659
3660     /* sampler dndi */
3661     dri_bo_map(pp_context->sampler_state_table.bo, True);
3662     assert(pp_context->sampler_state_table.bo->virtual);
3663     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3664     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3665
3666     /* sample dndi index 0 */
3667     index = 0;
3668     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3669     sampler_dndi[index].dw0.dnmh_delt = 7;
3670     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3671     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3672     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3673     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3674
3675     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3676     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3677     sampler_dndi[index].dw1.stmm_c2 = 2;
3678     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3679     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3680
3681     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3682     sampler_dndi[index].dw2.bne_edge_th = 1;
3683     sampler_dndi[index].dw2.smooth_mv_th = 0;
3684     sampler_dndi[index].dw2.sad_tight_th = 5;
3685     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3686     sampler_dndi[index].dw2.good_neighbor_th = 12;
3687
3688     sampler_dndi[index].dw3.maximum_stmm = 150;
3689     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3690     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3691     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3692     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3693
3694     sampler_dndi[index].dw4.sdi_delta = 5;
3695     sampler_dndi[index].dw4.sdi_threshold = 100;
3696     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3697     sampler_dndi[index].dw4.stmm_shift_up = 1;
3698     sampler_dndi[index].dw4.stmm_shift_down = 0;
3699     sampler_dndi[index].dw4.minimum_stmm = 118;
3700
3701     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3702     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3703     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3704     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3705     sampler_dndi[index].dw6.dn_enable = 0;
3706     sampler_dndi[index].dw6.di_enable = 1;
3707     sampler_dndi[index].dw6.di_partial = 0;
3708     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3709     sampler_dndi[index].dw6.dndi_stream_id = 1;
3710     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
3711     sampler_dndi[index].dw6.progressive_dn = 0;
3712     sampler_dndi[index].dw6.mcdi_enable = 0;
3713     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3714     sampler_dndi[index].dw6.cat_th1 = 0;
3715     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3716     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3717
3718     sampler_dndi[index].dw7.sad_tha = 5;
3719     sampler_dndi[index].dw7.sad_thb = 10;
3720     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3721     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3722     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3723     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3724     sampler_dndi[index].dw7.neighborpixel_th = 10;
3725     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3726
3727     dri_bo_unmap(pp_context->sampler_state_table.bo);
3728
3729     /* private function & data */
3730     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3731     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3732     pp_context->private_context = &pp_context->pp_dndi_context;
3733     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3734
3735     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3736     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3737     pp_static_parameter->grf1.di_top_field_first = 0;
3738     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3739
3740     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3741     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3742     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3743
3744     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3745     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3746
3747     pp_dndi_context->dest_w = w;
3748     pp_dndi_context->dest_h = h;
3749
3750     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3751
3752     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3753
3754     return VA_STATUS_SUCCESS;
3755 }
3756
3757 static int
3758 gen7_pp_dn_x_steps(void *private_context)
3759 {
3760     struct pp_dn_context *pp_dn_context = private_context;
3761
3762     return pp_dn_context->dest_w / 16;
3763 }
3764
3765 static int
3766 gen7_pp_dn_y_steps(void *private_context)
3767 {
3768     struct pp_dn_context *pp_dn_context = private_context;
3769
3770     return pp_dn_context->dest_h / 4;
3771 }
3772
3773 static int
3774 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3775 {
3776     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3777
3778     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3779     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3780
3781     return 0;
3782 }
3783
3784 static VAStatus
3785 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3786                            const struct i965_surface *src_surface,
3787                            const VARectangle *src_rect,
3788                            struct i965_surface *dst_surface,
3789                            const VARectangle *dst_rect,
3790                            void *filter_param)
3791 {
3792     struct i965_driver_data *i965 = i965_driver_data(ctx);
3793     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3794     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3795     struct object_surface *obj_surface;
3796     struct gen7_sampler_dndi *sampler_dn;
3797     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3798     int index;
3799     int w, h;
3800     int orig_w, orig_h;
3801     int dn_strength = 15;
3802     int dndi_top_first = 1;
3803     int dn_progressive = 0;
3804
3805     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3806         dndi_top_first = 1;
3807         dn_progressive = 1;
3808     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3809         dndi_top_first = 1;
3810         dn_progressive = 0;
3811     } else {
3812         dndi_top_first = 0;
3813         dn_progressive = 0;
3814     }
3815
3816     if (dn_filter_param) {
3817         float value = dn_filter_param->value;
3818         
3819         if (value > 1.0)
3820             value = 1.0;
3821         
3822         if (value < 0.0)
3823             value = 0.0;
3824
3825         dn_strength = (int)(value * 31.0F);
3826     }
3827
3828     /* surface */
3829     obj_surface = (struct object_surface *)src_surface->base;
3830     orig_w = obj_surface->orig_width;
3831     orig_h = obj_surface->orig_height;
3832     w = obj_surface->width;
3833     h = obj_surface->height;
3834
3835     if (pp_dn_context->stmm_bo == NULL) {
3836         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
3837                                              "STMM surface",
3838                                              w * h,
3839                                              4096);
3840         assert(pp_dn_context->stmm_bo);
3841     }
3842
3843     /* source UV surface index 1 */
3844     gen7_pp_set_surface_state(ctx, pp_context,
3845                               obj_surface->bo, w * h,
3846                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3847                               1, 0);
3848
3849     /* source YUV surface index 3 */
3850     gen7_pp_set_surface2_state(ctx, pp_context,
3851                                obj_surface->bo, 0,
3852                                orig_w, orig_h, w,
3853                                0, h,
3854                                SURFACE_FORMAT_PLANAR_420_8, 1,
3855                                3);
3856
3857     /* source (temporal reference) YUV surface index 4 */
3858     gen7_pp_set_surface2_state(ctx, pp_context,
3859                                obj_surface->bo, 0,
3860                                orig_w, orig_h, w,
3861                                0, h,
3862                                SURFACE_FORMAT_PLANAR_420_8, 1,
3863                                4);
3864
3865     /* STMM / History Statistics input surface, index 5 */
3866     gen7_pp_set_surface_state(ctx, pp_context,
3867                               pp_dn_context->stmm_bo, 0,
3868                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3869                               33, 1);
3870
3871     /* destination surface */
3872     obj_surface = (struct object_surface *)dst_surface->base;
3873     orig_w = obj_surface->orig_width;
3874     orig_h = obj_surface->orig_height;
3875     w = obj_surface->width;
3876     h = obj_surface->height;
3877
3878     /* destination Y surface index 24 */
3879     gen7_pp_set_surface_state(ctx, pp_context,
3880                               obj_surface->bo, 0,
3881                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3882                               24, 1);
3883
3884     /* destination UV surface index 25 */
3885     gen7_pp_set_surface_state(ctx, pp_context,
3886                               obj_surface->bo, w * h,
3887                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3888                               25, 1);
3889
3890     /* sampler dn */
3891     dri_bo_map(pp_context->sampler_state_table.bo, True);
3892     assert(pp_context->sampler_state_table.bo->virtual);
3893     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3894     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3895
3896     /* sample dn index 1 */
3897     index = 0;
3898     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3899     sampler_dn[index].dw0.dnmh_delt = 8;
3900     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3901     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3902     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3903     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3904
3905     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3906     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3907     sampler_dn[index].dw1.stmm_c2 = 0;
3908     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3909     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3910
3911     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3912     sampler_dn[index].dw2.bne_edge_th = 1;
3913     sampler_dn[index].dw2.smooth_mv_th = 0;
3914     sampler_dn[index].dw2.sad_tight_th = 5;
3915     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3916     sampler_dn[index].dw2.good_neighbor_th = 4;
3917
3918     sampler_dn[index].dw3.maximum_stmm = 128;
3919     sampler_dn[index].dw3.multipler_for_vecm = 2;
3920     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3921     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3922     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3923
3924     sampler_dn[index].dw4.sdi_delta = 8;
3925     sampler_dn[index].dw4.sdi_threshold = 128;
3926     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3927     sampler_dn[index].dw4.stmm_shift_up = 0;
3928     sampler_dn[index].dw4.stmm_shift_down = 0;
3929     sampler_dn[index].dw4.minimum_stmm = 0;
3930
3931     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3932     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3933     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3934     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3935
3936     sampler_dn[index].dw6.dn_enable = 1;
3937     sampler_dn[index].dw6.di_enable = 0;
3938     sampler_dn[index].dw6.di_partial = 0;
3939     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3940     sampler_dn[index].dw6.dndi_stream_id = 1;
3941     sampler_dn[index].dw6.dndi_first_frame = 1;
3942     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3943     sampler_dn[index].dw6.mcdi_enable = 0;
3944     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3945     sampler_dn[index].dw6.cat_th1 = 0;
3946     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3947     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3948
3949     sampler_dn[index].dw7.sad_tha = 5;
3950     sampler_dn[index].dw7.sad_thb = 10;
3951     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3952     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3953     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3954     sampler_dn[index].dw7.vdi_walker_enable = 0;
3955     sampler_dn[index].dw7.neighborpixel_th = 10;
3956     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3957
3958     dri_bo_unmap(pp_context->sampler_state_table.bo);
3959
3960     /* private function & data */
3961     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3962     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3963     pp_context->private_context = &pp_context->pp_dn_context;
3964     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3965
3966     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3967     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3968     pp_static_parameter->grf1.di_top_field_first = 0;
3969     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3970
3971     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3972     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3973     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3974
3975     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3976     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3977
3978     pp_dn_context->dest_w = w;
3979     pp_dn_context->dest_h = h;
3980
3981     dst_surface->flags = src_surface->flags;
3982
3983     return VA_STATUS_SUCCESS;
3984 }
3985
3986 static VAStatus
3987 ironlake_pp_initialize(
3988     VADriverContextP ctx,
3989     struct i965_post_processing_context *pp_context,
3990     const struct i965_surface *src_surface,
3991     const VARectangle *src_rect,
3992     struct i965_surface *dst_surface,
3993     const VARectangle *dst_rect,
3994     int pp_index,
3995     void *filter_param
3996 )
3997 {
3998     VAStatus va_status;
3999     struct i965_driver_data *i965 = i965_driver_data(ctx);
4000     struct pp_module *pp_module;
4001     dri_bo *bo;
4002     int static_param_size, inline_param_size;
4003
4004     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4005     bo = dri_bo_alloc(i965->intel.bufmgr,
4006                       "surface state & binding table",
4007                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4008                       4096);
4009     assert(bo);
4010     pp_context->surface_state_binding_table.bo = bo;
4011
4012     dri_bo_unreference(pp_context->curbe.bo);
4013     bo = dri_bo_alloc(i965->intel.bufmgr,
4014                       "constant buffer",
4015                       4096, 
4016                       4096);
4017     assert(bo);
4018     pp_context->curbe.bo = bo;
4019
4020     dri_bo_unreference(pp_context->idrt.bo);
4021     bo = dri_bo_alloc(i965->intel.bufmgr, 
4022                       "interface discriptor", 
4023                       sizeof(struct i965_interface_descriptor), 
4024                       4096);
4025     assert(bo);
4026     pp_context->idrt.bo = bo;
4027     pp_context->idrt.num_interface_descriptors = 0;
4028
4029     dri_bo_unreference(pp_context->sampler_state_table.bo);
4030     bo = dri_bo_alloc(i965->intel.bufmgr, 
4031                       "sampler state table", 
4032                       4096,
4033                       4096);
4034     assert(bo);
4035     dri_bo_map(bo, True);
4036     memset(bo->virtual, 0, bo->size);
4037     dri_bo_unmap(bo);
4038     pp_context->sampler_state_table.bo = bo;
4039
4040     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4041     bo = dri_bo_alloc(i965->intel.bufmgr, 
4042                       "sampler 8x8 state ",
4043                       4096,
4044                       4096);
4045     assert(bo);
4046     pp_context->sampler_state_table.bo_8x8 = bo;
4047
4048     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4049     bo = dri_bo_alloc(i965->intel.bufmgr, 
4050                       "sampler 8x8 state ",
4051                       4096,
4052                       4096);
4053     assert(bo);
4054     pp_context->sampler_state_table.bo_8x8_uv = bo;
4055
4056     dri_bo_unreference(pp_context->vfe_state.bo);
4057     bo = dri_bo_alloc(i965->intel.bufmgr, 
4058                       "vfe state", 
4059                       sizeof(struct i965_vfe_state), 
4060                       4096);
4061     assert(bo);
4062     pp_context->vfe_state.bo = bo;
4063
4064     static_param_size = sizeof(struct pp_static_parameter);
4065     inline_param_size = sizeof(struct pp_inline_parameter);
4066
4067     memset(pp_context->pp_static_parameter, 0, static_param_size);
4068     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4069     
4070     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4071     pp_context->current_pp = pp_index;
4072     pp_module = &pp_context->pp_modules[pp_index];
4073     
4074     if (pp_module->initialize)
4075         va_status = pp_module->initialize(ctx, pp_context,
4076                                           src_surface,
4077                                           src_rect,
4078                                           dst_surface,
4079                                           dst_rect,
4080                                           filter_param);
4081     else
4082         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4083
4084     return va_status;
4085 }
4086
4087 static VAStatus
4088 ironlake_post_processing(
4089     VADriverContextP   ctx,
4090     struct i965_post_processing_context *pp_context,
4091     const struct i965_surface *src_surface,
4092     const VARectangle *src_rect,
4093     struct i965_surface *dst_surface,
4094     const VARectangle *dst_rect,
4095     int                pp_index,
4096     void *filter_param
4097 )
4098 {
4099     VAStatus va_status;
4100
4101     va_status = ironlake_pp_initialize(ctx, pp_context,
4102                                        src_surface,
4103                                        src_rect,
4104                                        dst_surface,
4105                                        dst_rect,
4106                                        pp_index,
4107                                        filter_param);
4108
4109     if (va_status == VA_STATUS_SUCCESS) {
4110         ironlake_pp_states_setup(ctx, pp_context);
4111         ironlake_pp_pipeline_setup(ctx, pp_context);
4112     }
4113
4114     return va_status;
4115 }
4116
4117 static VAStatus
4118 gen6_pp_initialize(
4119     VADriverContextP ctx,
4120     struct i965_post_processing_context *pp_context,
4121     const struct i965_surface *src_surface,
4122     const VARectangle *src_rect,
4123     struct i965_surface *dst_surface,
4124     const VARectangle *dst_rect,
4125     int pp_index,
4126     void *filter_param
4127 )
4128 {
4129     VAStatus va_status;
4130     struct i965_driver_data *i965 = i965_driver_data(ctx);
4131     struct pp_module *pp_module;
4132     dri_bo *bo;
4133     int static_param_size, inline_param_size;
4134
4135     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4136     bo = dri_bo_alloc(i965->intel.bufmgr,
4137                       "surface state & binding table",
4138                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4139                       4096);
4140     assert(bo);
4141     pp_context->surface_state_binding_table.bo = bo;
4142
4143     dri_bo_unreference(pp_context->curbe.bo);
4144     bo = dri_bo_alloc(i965->intel.bufmgr,
4145                       "constant buffer",
4146                       4096, 
4147                       4096);
4148     assert(bo);
4149     pp_context->curbe.bo = bo;
4150
4151     dri_bo_unreference(pp_context->idrt.bo);
4152     bo = dri_bo_alloc(i965->intel.bufmgr, 
4153                       "interface discriptor", 
4154                       sizeof(struct gen6_interface_descriptor_data), 
4155                       4096);
4156     assert(bo);
4157     pp_context->idrt.bo = bo;
4158     pp_context->idrt.num_interface_descriptors = 0;
4159
4160     dri_bo_unreference(pp_context->sampler_state_table.bo);
4161     bo = dri_bo_alloc(i965->intel.bufmgr, 
4162                       "sampler state table", 
4163                       4096,
4164                       4096);
4165     assert(bo);
4166     dri_bo_map(bo, True);
4167     memset(bo->virtual, 0, bo->size);
4168     dri_bo_unmap(bo);
4169     pp_context->sampler_state_table.bo = bo;
4170
4171     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4172     bo = dri_bo_alloc(i965->intel.bufmgr, 
4173                       "sampler 8x8 state ",
4174                       4096,
4175                       4096);
4176     assert(bo);
4177     pp_context->sampler_state_table.bo_8x8 = bo;
4178
4179     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4180     bo = dri_bo_alloc(i965->intel.bufmgr, 
4181                       "sampler 8x8 state ",
4182                       4096,
4183                       4096);
4184     assert(bo);
4185     pp_context->sampler_state_table.bo_8x8_uv = bo;
4186
4187     dri_bo_unreference(pp_context->vfe_state.bo);
4188     bo = dri_bo_alloc(i965->intel.bufmgr, 
4189                       "vfe state", 
4190                       sizeof(struct i965_vfe_state), 
4191                       4096);
4192     assert(bo);
4193     pp_context->vfe_state.bo = bo;
4194     
4195     if (IS_GEN7(i965->intel.device_info)) {
4196         static_param_size = sizeof(struct gen7_pp_static_parameter);
4197         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4198     } else {
4199         static_param_size = sizeof(struct pp_static_parameter);
4200         inline_param_size = sizeof(struct pp_inline_parameter);
4201     }
4202
4203     memset(pp_context->pp_static_parameter, 0, static_param_size);
4204     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4205
4206     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4207     pp_context->current_pp = pp_index;
4208     pp_module = &pp_context->pp_modules[pp_index];
4209     
4210     if (pp_module->initialize)
4211         va_status = pp_module->initialize(ctx, pp_context,
4212                                           src_surface,
4213                                           src_rect,
4214                                           dst_surface,
4215                                           dst_rect,
4216                                           filter_param);
4217     else
4218         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4219
4220     calculate_boundary_block_mask(pp_context, dst_rect);
4221
4222     return va_status;
4223 }
4224
4225
4226 static void
4227 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4228                                    struct i965_post_processing_context *pp_context)
4229 {
4230     struct i965_driver_data *i965 = i965_driver_data(ctx);
4231     struct gen6_interface_descriptor_data *desc;
4232     dri_bo *bo;
4233     int pp_index = pp_context->current_pp;
4234
4235     bo = pp_context->idrt.bo;
4236     dri_bo_map(bo, True);
4237     assert(bo->virtual);
4238     desc = bo->virtual;
4239     memset(desc, 0, sizeof(*desc));
4240     desc->desc0.kernel_start_pointer = 
4241         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4242     desc->desc1.single_program_flow = 1;
4243     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4244     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4245     desc->desc2.sampler_state_pointer = 
4246         pp_context->sampler_state_table.bo->offset >> 5;
4247     desc->desc3.binding_table_entry_count = 0;
4248     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4249     desc->desc4.constant_urb_entry_read_offset = 0;
4250
4251     if (IS_GEN7(i965->intel.device_info))
4252         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4253     else
4254         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4255
4256     dri_bo_emit_reloc(bo,
4257                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4258                       0,
4259                       offsetof(struct gen6_interface_descriptor_data, desc0),
4260                       pp_context->pp_modules[pp_index].kernel.bo);
4261
4262     dri_bo_emit_reloc(bo,
4263                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4264                       desc->desc2.sampler_count << 2,
4265                       offsetof(struct gen6_interface_descriptor_data, desc2),
4266                       pp_context->sampler_state_table.bo);
4267
4268     dri_bo_unmap(bo);
4269     pp_context->idrt.num_interface_descriptors++;
4270 }
4271
4272 static void
4273 gen6_pp_upload_constants(VADriverContextP ctx,
4274                          struct i965_post_processing_context *pp_context)
4275 {
4276     struct i965_driver_data *i965 = i965_driver_data(ctx);
4277     unsigned char *constant_buffer;
4278     int param_size;
4279
4280     assert(sizeof(struct pp_static_parameter) == 128);
4281     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4282
4283     if (IS_GEN7(i965->intel.device_info))
4284         param_size = sizeof(struct gen7_pp_static_parameter);
4285     else
4286         param_size = sizeof(struct pp_static_parameter);
4287
4288     dri_bo_map(pp_context->curbe.bo, 1);
4289     assert(pp_context->curbe.bo->virtual);
4290     constant_buffer = pp_context->curbe.bo->virtual;
4291     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4292     dri_bo_unmap(pp_context->curbe.bo);
4293 }
4294
4295 static void
4296 gen6_pp_states_setup(VADriverContextP ctx,
4297                      struct i965_post_processing_context *pp_context)
4298 {
4299     gen6_pp_interface_descriptor_table(ctx, pp_context);
4300     gen6_pp_upload_constants(ctx, pp_context);
4301 }
4302
4303 static void
4304 gen6_pp_pipeline_select(VADriverContextP ctx,
4305                         struct i965_post_processing_context *pp_context)
4306 {
4307     struct intel_batchbuffer *batch = pp_context->batch;
4308
4309     BEGIN_BATCH(batch, 1);
4310     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4311     ADVANCE_BATCH(batch);
4312 }
4313
4314 static void
4315 gen6_pp_state_base_address(VADriverContextP ctx,
4316                            struct i965_post_processing_context *pp_context)
4317 {
4318     struct intel_batchbuffer *batch = pp_context->batch;
4319
4320     BEGIN_BATCH(batch, 10);
4321     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4322     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4323     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4324     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4325     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4326     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4327     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4328     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4329     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4330     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4331     ADVANCE_BATCH(batch);
4332 }
4333
4334 static void
4335 gen6_pp_vfe_state(VADriverContextP ctx,
4336                   struct i965_post_processing_context *pp_context)
4337 {
4338     struct intel_batchbuffer *batch = pp_context->batch;
4339
4340     BEGIN_BATCH(batch, 8);
4341     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4342     OUT_BATCH(batch, 0);
4343     OUT_BATCH(batch,
4344               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4345               pp_context->vfe_gpu_state.num_urb_entries << 8);
4346     OUT_BATCH(batch, 0);
4347     OUT_BATCH(batch,
4348               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
4349                 /* URB Entry Allocation Size, in 256 bits unit */
4350               (pp_context->vfe_gpu_state.curbe_allocation_size));
4351                 /* CURBE Allocation Size, in 256 bits unit */
4352     OUT_BATCH(batch, 0);
4353     OUT_BATCH(batch, 0);
4354     OUT_BATCH(batch, 0);
4355     ADVANCE_BATCH(batch);
4356 }
4357
4358 static void
4359 gen6_pp_curbe_load(VADriverContextP ctx,
4360                    struct i965_post_processing_context *pp_context)
4361 {
4362     struct intel_batchbuffer *batch = pp_context->batch;
4363     struct i965_driver_data *i965 = i965_driver_data(ctx);
4364     int param_size;
4365
4366     if (IS_GEN7(i965->intel.device_info))
4367         param_size = sizeof(struct gen7_pp_static_parameter);
4368     else
4369         param_size = sizeof(struct pp_static_parameter);
4370
4371     BEGIN_BATCH(batch, 4);
4372     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4373     OUT_BATCH(batch, 0);
4374     OUT_BATCH(batch,
4375               param_size);
4376     OUT_RELOC(batch, 
4377               pp_context->curbe.bo,
4378               I915_GEM_DOMAIN_INSTRUCTION, 0,
4379               0);
4380     ADVANCE_BATCH(batch);
4381 }
4382
4383 static void
4384 gen6_interface_descriptor_load(VADriverContextP ctx,
4385                                struct i965_post_processing_context *pp_context)
4386 {
4387     struct intel_batchbuffer *batch = pp_context->batch;
4388
4389     BEGIN_BATCH(batch, 4);
4390     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4391     OUT_BATCH(batch, 0);
4392     OUT_BATCH(batch,
4393               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4394     OUT_RELOC(batch, 
4395               pp_context->idrt.bo,
4396               I915_GEM_DOMAIN_INSTRUCTION, 0,
4397               0);
4398     ADVANCE_BATCH(batch);
4399 }
4400
4401 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4402 {
4403     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4404
4405     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4406     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4407     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4408     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4409     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4410     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4411
4412     /* 1 x N */
4413     if (x_steps == 1) {
4414         if (y == y_steps-1) {
4415             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4416         }
4417         else {
4418             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4419         }
4420     }
4421
4422     /* M x 1 */
4423     if (y_steps == 1) {
4424         if (x == 0) { // all blocks in this group are on the left edge
4425             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4426             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4427         }
4428         else if (x == x_steps-1) {
4429             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4430             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4431         }
4432         else {
4433             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4434             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4435             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4436         }
4437     }
4438
4439 }
4440
4441 static void
4442 gen6_pp_object_walker(VADriverContextP ctx,
4443                       struct i965_post_processing_context *pp_context)
4444 {
4445     struct i965_driver_data *i965 = i965_driver_data(ctx);
4446     struct intel_batchbuffer *batch = pp_context->batch;
4447     int x, x_steps, y, y_steps;
4448     int param_size, command_length_in_dws;
4449     dri_bo *command_buffer;
4450     unsigned int *command_ptr;
4451
4452     if (IS_GEN7(i965->intel.device_info))
4453         param_size = sizeof(struct gen7_pp_inline_parameter);
4454     else
4455         param_size = sizeof(struct pp_inline_parameter);
4456
4457     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4458     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4459     command_length_in_dws = 6 + (param_size >> 2);
4460     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4461                                   "command objects buffer",
4462                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4463                                   4096);
4464
4465     dri_bo_map(command_buffer, 1);
4466     command_ptr = command_buffer->virtual;
4467
4468     for (y = 0; y < y_steps; y++) {
4469         for (x = 0; x < x_steps; x++) {
4470             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4471                 // some common block parameter update goes here, apply to all pp functions
4472                 if (IS_GEN6(i965->intel.device_info))
4473                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4474                 
4475                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4476                 *command_ptr++ = 0;
4477                 *command_ptr++ = 0;
4478                 *command_ptr++ = 0;
4479                 *command_ptr++ = 0;
4480                 *command_ptr++ = 0;
4481                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4482                 command_ptr += (param_size >> 2);
4483             }
4484         }
4485     }
4486
4487     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4488         *command_ptr++ = 0;
4489
4490     *command_ptr = MI_BATCH_BUFFER_END;
4491
4492     dri_bo_unmap(command_buffer);
4493
4494     BEGIN_BATCH(batch, 2);
4495     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4496     OUT_RELOC(batch, command_buffer,
4497               I915_GEM_DOMAIN_COMMAND, 0,
4498               0);
4499     ADVANCE_BATCH(batch);
4500
4501     dri_bo_unreference(command_buffer);
4502
4503     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4504      * will cause control to pass back to ring buffer 
4505      */
4506     intel_batchbuffer_end_atomic(batch);
4507     intel_batchbuffer_flush(batch);
4508     intel_batchbuffer_start_atomic(batch, 0x1000);
4509 }
4510
4511 static void
4512 gen6_pp_pipeline_setup(VADriverContextP ctx,
4513                        struct i965_post_processing_context *pp_context)
4514 {
4515     struct intel_batchbuffer *batch = pp_context->batch;
4516
4517     intel_batchbuffer_start_atomic(batch, 0x1000);
4518     intel_batchbuffer_emit_mi_flush(batch);
4519     gen6_pp_pipeline_select(ctx, pp_context);
4520     gen6_pp_state_base_address(ctx, pp_context);
4521     gen6_pp_vfe_state(ctx, pp_context);
4522     gen6_pp_curbe_load(ctx, pp_context);
4523     gen6_interface_descriptor_load(ctx, pp_context);
4524     gen6_pp_object_walker(ctx, pp_context);
4525     intel_batchbuffer_end_atomic(batch);
4526 }
4527
4528 static VAStatus
4529 gen6_post_processing(
4530     VADriverContextP ctx,
4531     struct i965_post_processing_context *pp_context,
4532     const struct i965_surface *src_surface,
4533     const VARectangle *src_rect,
4534     struct i965_surface *dst_surface,
4535     const VARectangle *dst_rect,
4536     int pp_index,
4537     void *filter_param
4538 )
4539 {
4540     VAStatus va_status;
4541     
4542     va_status = gen6_pp_initialize(ctx, pp_context,
4543                                    src_surface,
4544                                    src_rect,
4545                                    dst_surface,
4546                                    dst_rect,
4547                                    pp_index,
4548                                    filter_param);
4549
4550     if (va_status == VA_STATUS_SUCCESS) {
4551         gen6_pp_states_setup(ctx, pp_context);
4552         gen6_pp_pipeline_setup(ctx, pp_context);
4553     }
4554
4555     if (va_status == VA_STATUS_SUCCESS_1)
4556         va_status = VA_STATUS_SUCCESS;
4557
4558     return va_status;
4559 }
4560
4561 static VAStatus
4562 i965_post_processing_internal(
4563     VADriverContextP   ctx,
4564     struct i965_post_processing_context *pp_context,
4565     const struct i965_surface *src_surface,
4566     const VARectangle *src_rect,
4567     struct i965_surface *dst_surface,
4568     const VARectangle *dst_rect,
4569     int                pp_index,
4570     void *filter_param
4571 )
4572 {
4573     VAStatus va_status;
4574     struct i965_driver_data *i965 = i965_driver_data(ctx);
4575
4576     if (pp_context && pp_context->intel_post_processing) {
4577         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4578                           src_surface, src_rect,
4579                           dst_surface, dst_rect,
4580                           pp_index, filter_param);
4581     } else {
4582         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4583     }
4584
4585     return va_status;
4586 }
4587
4588 static void
4589 rgb_to_yuv(unsigned int argb,
4590            unsigned char *y,
4591            unsigned char *u,
4592            unsigned char *v,
4593            unsigned char *a)
4594 {
4595     int r = ((argb >> 16) & 0xff);
4596     int g = ((argb >> 8) & 0xff);
4597     int b = ((argb >> 0) & 0xff);
4598     
4599     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4600     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4601     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4602     *a = ((argb >> 24) & 0xff);
4603 }
4604
4605 static void 
4606 i965_vpp_clear_surface(VADriverContextP ctx,
4607                        struct i965_post_processing_context *pp_context,
4608                        struct object_surface *obj_surface,
4609                        unsigned int color)
4610 {
4611     struct i965_driver_data *i965 = i965_driver_data(ctx);
4612     struct intel_batchbuffer *batch = pp_context->batch;
4613     unsigned int blt_cmd, br13;
4614     unsigned int tiling = 0, swizzle = 0;
4615     int pitch;
4616     unsigned char y, u, v, a = 0;
4617     int region_width, region_height;
4618
4619     /* Currently only support NV12 surface */
4620     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4621         return;
4622
4623     rgb_to_yuv(color, &y, &u, &v, &a);
4624
4625     if (a == 0)
4626         return;
4627
4628     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4629     blt_cmd = XY_COLOR_BLT_CMD;
4630     pitch = obj_surface->width;
4631
4632     if (tiling != I915_TILING_NONE) {
4633         assert(tiling == I915_TILING_Y);
4634         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4635         // pitch >>= 2;
4636     }
4637
4638     br13 = 0xf0 << 16;
4639     br13 |= BR13_8;
4640     br13 |= pitch;
4641
4642     if (IS_IRONLAKE(i965->intel.device_info)) {
4643         intel_batchbuffer_start_atomic(batch, 48);
4644         BEGIN_BATCH(batch, 12);
4645     } else {
4646         /* Will double-check the command if the new chipset is added */
4647         intel_batchbuffer_start_atomic_blt(batch, 48);
4648         BEGIN_BLT_BATCH(batch, 12);
4649     }
4650
4651     region_width = obj_surface->width;
4652     region_height = obj_surface->height;
4653
4654     OUT_BATCH(batch, blt_cmd);
4655     OUT_BATCH(batch, br13);
4656     OUT_BATCH(batch,
4657               0 << 16 |
4658               0);
4659     OUT_BATCH(batch,
4660               region_height << 16 |
4661               region_width);
4662     OUT_RELOC(batch, obj_surface->bo, 
4663               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4664               0);
4665     OUT_BATCH(batch, y);
4666
4667     br13 = 0xf0 << 16;
4668     br13 |= BR13_565;
4669     br13 |= pitch;
4670
4671     region_width = obj_surface->width / 2;
4672     region_height = obj_surface->height / 2;
4673
4674     if (tiling == I915_TILING_Y) {
4675         region_height = ALIGN(obj_surface->height / 2, 32);
4676     }
4677
4678     OUT_BATCH(batch, blt_cmd);
4679     OUT_BATCH(batch, br13);
4680     OUT_BATCH(batch,
4681               0 << 16 |
4682               0);
4683     OUT_BATCH(batch,
4684               region_height << 16 |
4685               region_width);
4686     OUT_RELOC(batch, obj_surface->bo, 
4687               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4688               obj_surface->width * obj_surface->y_cb_offset);
4689     OUT_BATCH(batch, v << 8 | u);
4690
4691     ADVANCE_BATCH(batch);
4692     intel_batchbuffer_end_atomic(batch);
4693 }
4694
4695 VAStatus
4696 i965_scaling_processing(
4697     VADriverContextP   ctx,
4698     struct object_surface *src_surface_obj,
4699     const VARectangle *src_rect,
4700     struct object_surface *dst_surface_obj,
4701     const VARectangle *dst_rect,
4702     unsigned int       flags)
4703 {
4704     VAStatus va_status = VA_STATUS_SUCCESS;
4705     struct i965_driver_data *i965 = i965_driver_data(ctx);
4706  
4707     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4708     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4709
4710     if (HAS_VPP(i965) && (flags & I965_PP_FLAG_AVS)) {
4711         struct i965_surface src_surface;
4712         struct i965_surface dst_surface;
4713
4714          _i965LockMutex(&i965->pp_mutex);
4715
4716          src_surface.base = (struct object_base *)src_surface_obj;
4717          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4718          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4719          dst_surface.base = (struct object_base *)dst_surface_obj;
4720          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4721          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4722
4723          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4724                                                    &src_surface,
4725                                                    src_rect,
4726                                                    &dst_surface,
4727                                                    dst_rect,
4728                                                    PP_NV12_AVS,
4729                                                    NULL);
4730
4731          _i965UnlockMutex(&i965->pp_mutex);
4732     }
4733
4734     return va_status;
4735 }
4736
4737 VASurfaceID
4738 i965_post_processing(
4739     VADriverContextP   ctx,
4740     struct object_surface *obj_surface,
4741     const VARectangle *src_rect,
4742     const VARectangle *dst_rect,
4743     unsigned int       flags,
4744     int               *has_done_scaling  
4745 )
4746 {
4747     struct i965_driver_data *i965 = i965_driver_data(ctx);
4748     VASurfaceID out_surface_id = VA_INVALID_ID;
4749     VASurfaceID tmp_id = VA_INVALID_ID;
4750     
4751     *has_done_scaling = 0;
4752
4753     if (HAS_VPP(i965)) {
4754         VAStatus status;
4755         struct i965_surface src_surface;
4756         struct i965_surface dst_surface;
4757
4758         /* Currently only support post processing for NV12 surface */
4759         if (obj_surface->fourcc != VA_FOURCC_NV12)
4760             return out_surface_id;
4761
4762         _i965LockMutex(&i965->pp_mutex);
4763
4764         if (flags & I965_PP_FLAG_MCDI) {
4765             src_surface.base = (struct object_base *)obj_surface;
4766             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4767             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4768                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4769
4770             status = i965_CreateSurfaces(ctx,
4771                                          obj_surface->orig_width,
4772                                          obj_surface->orig_height,
4773                                          VA_RT_FORMAT_YUV420,
4774                                          1,
4775                                          &out_surface_id);
4776             assert(status == VA_STATUS_SUCCESS);
4777             obj_surface = SURFACE(out_surface_id);
4778             assert(obj_surface);
4779             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4780             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4781
4782             dst_surface.base = (struct object_base *)obj_surface;
4783             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4784             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4785
4786             i965_post_processing_internal(ctx, i965->pp_context,
4787                                           &src_surface,
4788                                           src_rect,
4789                                           &dst_surface,
4790                                           dst_rect,
4791                                           PP_NV12_DNDI,
4792                                           NULL);
4793         }
4794
4795         if (flags & I965_PP_FLAG_AVS) {
4796             struct i965_render_state *render_state = &i965->render_state;
4797             struct intel_region *dest_region = render_state->draw_region;
4798
4799             if (out_surface_id != VA_INVALID_ID)
4800                 tmp_id = out_surface_id;
4801
4802             src_surface.base = (struct object_base *)obj_surface;
4803             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4804             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4805
4806             status = i965_CreateSurfaces(ctx,
4807                                          dest_region->width,
4808                                          dest_region->height,
4809                                          VA_RT_FORMAT_YUV420,
4810                                          1,
4811                                          &out_surface_id);
4812             assert(status == VA_STATUS_SUCCESS);
4813             obj_surface = SURFACE(out_surface_id);
4814             assert(obj_surface);
4815             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4816             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4817
4818             dst_surface.base = (struct object_base *)obj_surface;
4819             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4820             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4821
4822             i965_post_processing_internal(ctx, i965->pp_context,
4823                                           &src_surface,
4824                                           src_rect,
4825                                           &dst_surface,
4826                                           dst_rect,
4827                                           PP_NV12_AVS,
4828                                           NULL);
4829
4830             if (tmp_id != VA_INVALID_ID)
4831                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4832                 
4833             *has_done_scaling = 1;
4834         }
4835
4836         _i965UnlockMutex(&i965->pp_mutex);
4837     }
4838
4839     return out_surface_id;
4840 }       
4841
4842 static VAStatus
4843 i965_image_pl2_processing(VADriverContextP ctx,
4844                           const struct i965_surface *src_surface,
4845                           const VARectangle *src_rect,
4846                           struct i965_surface *dst_surface,
4847                           const VARectangle *dst_rect);
4848
4849 static VAStatus
4850 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4851                                    VAStatus (*i965_image_plx_nv12_processing)(
4852                                        VADriverContextP,
4853                                        const struct i965_surface *,
4854                                        const VARectangle *,
4855                                        struct i965_surface *,
4856                                        const VARectangle *),
4857                                    const struct i965_surface *src_surface,
4858                                    const VARectangle *src_rect,
4859                                    struct i965_surface *dst_surface,
4860                                    const VARectangle *dst_rect)
4861 {
4862     struct i965_driver_data *i965 = i965_driver_data(ctx);
4863     VAStatus status;
4864     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
4865     struct object_surface *obj_surface = NULL;
4866     struct i965_surface tmp_surface;
4867     int width, height;
4868
4869     pp_get_surface_size(ctx, dst_surface, &width, &height);
4870     status = i965_CreateSurfaces(ctx,
4871                                  width,
4872                                  height,
4873                                  VA_RT_FORMAT_YUV420,
4874                                  1,
4875                                  &tmp_surface_id);
4876     assert(status == VA_STATUS_SUCCESS);
4877     obj_surface = SURFACE(tmp_surface_id);
4878     assert(obj_surface);
4879     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4880
4881     tmp_surface.base = (struct object_base *)obj_surface;
4882     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
4883     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
4884
4885     status = i965_image_plx_nv12_processing(ctx,
4886                                             src_surface,
4887                                             src_rect,
4888                                             &tmp_surface,
4889                                             dst_rect);
4890
4891     if (status == VA_STATUS_SUCCESS)
4892         status = i965_image_pl2_processing(ctx,
4893                                            &tmp_surface,
4894                                            dst_rect,
4895                                            dst_surface,
4896                                            dst_rect);
4897
4898     i965_DestroySurfaces(ctx,
4899                          &tmp_surface_id,
4900                          1);
4901
4902     return status;
4903 }
4904
4905
4906 static VAStatus
4907 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4908                                const struct i965_surface *src_surface,
4909                                const VARectangle *src_rect,
4910                                struct i965_surface *dst_surface,
4911                                const VARectangle *dst_rect)
4912 {
4913     struct i965_driver_data *i965 = i965_driver_data(ctx);
4914     struct i965_post_processing_context *pp_context = i965->pp_context;
4915     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4916     VAStatus vaStatus;
4917
4918     switch (fourcc) {
4919     case VA_FOURCC_NV12:
4920         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4921                                                  src_surface,
4922                                                  src_rect,
4923                                                  dst_surface,
4924                                                  dst_rect,
4925                                                  PP_RGBX_LOAD_SAVE_NV12,
4926                                                  NULL);
4927         intel_batchbuffer_flush(pp_context->batch);
4928         break;
4929
4930     default:
4931         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4932                                                       i965_image_pl1_rgbx_processing,
4933                                                       src_surface,
4934                                                       src_rect,
4935                                                       dst_surface,
4936                                                       dst_rect);
4937         break;
4938     }
4939
4940     return vaStatus;
4941 }
4942
4943 static VAStatus
4944 i965_image_pl3_processing(VADriverContextP ctx,
4945                           const struct i965_surface *src_surface,
4946                           const VARectangle *src_rect,
4947                           struct i965_surface *dst_surface,
4948                           const VARectangle *dst_rect)
4949 {
4950     struct i965_driver_data *i965 = i965_driver_data(ctx);
4951     struct i965_post_processing_context *pp_context = i965->pp_context;
4952     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4953     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4954
4955     switch (fourcc) {
4956     case VA_FOURCC_NV12:
4957         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4958                                                  src_surface,
4959                                                  src_rect,
4960                                                  dst_surface,
4961                                                  dst_rect,
4962                                                  PP_PL3_LOAD_SAVE_N12,
4963                                                  NULL);
4964         intel_batchbuffer_flush(pp_context->batch);
4965         break;
4966
4967     case VA_FOURCC_IMC1:
4968     case VA_FOURCC_IMC3:
4969     case VA_FOURCC_YV12:
4970     case VA_FOURCC_I420:
4971         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4972                                                  src_surface,
4973                                                  src_rect,
4974                                                  dst_surface,
4975                                                  dst_rect,
4976                                                  PP_PL3_LOAD_SAVE_PL3,
4977                                                  NULL);
4978         intel_batchbuffer_flush(pp_context->batch);
4979         break;
4980
4981     case VA_FOURCC_YUY2:
4982     case VA_FOURCC_UYVY:
4983         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4984                                                  src_surface,
4985                                                  src_rect,
4986                                                  dst_surface,
4987                                                  dst_rect,
4988                                                  PP_PL3_LOAD_SAVE_PA,
4989                                                  NULL);
4990         intel_batchbuffer_flush(pp_context->batch);
4991         break;
4992
4993     default:
4994         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4995                                                       i965_image_pl3_processing,
4996                                                       src_surface,
4997                                                       src_rect,
4998                                                       dst_surface,
4999                                                       dst_rect);
5000         break;
5001     }
5002
5003     return vaStatus;
5004 }
5005
5006 static VAStatus
5007 i965_image_pl2_processing(VADriverContextP ctx,
5008                           const struct i965_surface *src_surface,
5009                           const VARectangle *src_rect,
5010                           struct i965_surface *dst_surface,
5011                           const VARectangle *dst_rect)
5012 {
5013     struct i965_driver_data *i965 = i965_driver_data(ctx);
5014     struct i965_post_processing_context *pp_context = i965->pp_context;
5015     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5016     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5017
5018     switch (fourcc) {
5019     case VA_FOURCC_NV12:
5020         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5021                                                  src_surface,
5022                                                  src_rect,
5023                                                  dst_surface,
5024                                                  dst_rect,
5025                                                  PP_NV12_LOAD_SAVE_N12,
5026                                                  NULL);
5027         break;
5028
5029     case VA_FOURCC_IMC1:
5030     case VA_FOURCC_IMC3:
5031     case VA_FOURCC_YV12:
5032     case VA_FOURCC_I420:
5033         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5034                                                  src_surface,
5035                                                  src_rect,
5036                                                  dst_surface,
5037                                                  dst_rect,
5038                                                  PP_NV12_LOAD_SAVE_PL3,
5039                                                  NULL);
5040         break;
5041
5042     case VA_FOURCC_YUY2:
5043     case VA_FOURCC_UYVY:
5044         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5045                                                  src_surface,
5046                                                  src_rect,
5047                                                  dst_surface,
5048                                                  dst_rect,
5049                                                  PP_NV12_LOAD_SAVE_PA,
5050                                                  NULL);
5051         break;
5052
5053     case VA_FOURCC_BGRX:
5054     case VA_FOURCC_BGRA:
5055     case VA_FOURCC_RGBX:
5056     case VA_FOURCC_RGBA:
5057         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5058                                                  src_surface,
5059                                                  src_rect,
5060                                                  dst_surface,
5061                                                  dst_rect,
5062                                                  PP_NV12_LOAD_SAVE_RGBX,
5063                                                  NULL);
5064         break;
5065
5066     default:
5067         return VA_STATUS_ERROR_UNIMPLEMENTED;
5068     }
5069
5070     intel_batchbuffer_flush(pp_context->batch);
5071
5072     return vaStatus;
5073 }
5074
5075 static VAStatus
5076 i965_image_pl1_processing(VADriverContextP ctx,
5077                           const struct i965_surface *src_surface,
5078                           const VARectangle *src_rect,
5079                           struct i965_surface *dst_surface,
5080                           const VARectangle *dst_rect)
5081 {
5082     struct i965_driver_data *i965 = i965_driver_data(ctx);
5083     struct i965_post_processing_context *pp_context = i965->pp_context;
5084     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5085     VAStatus vaStatus;
5086
5087     switch (fourcc) {
5088     case VA_FOURCC_NV12:
5089         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5090                                                  src_surface,
5091                                                  src_rect,
5092                                                  dst_surface,
5093                                                  dst_rect,
5094                                                  PP_PA_LOAD_SAVE_NV12,
5095                                                  NULL);
5096         intel_batchbuffer_flush(pp_context->batch);
5097         break;
5098
5099     case VA_FOURCC_YV12:
5100         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5101                                                  src_surface,
5102                                                  src_rect,
5103                                                  dst_surface,
5104                                                  dst_rect,
5105                                                  PP_PA_LOAD_SAVE_PL3,
5106                                                  NULL);
5107         intel_batchbuffer_flush(pp_context->batch);
5108         break;
5109
5110     case VA_FOURCC_YUY2:
5111     case VA_FOURCC_UYVY:
5112         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5113                                                  src_surface,
5114                                                  src_rect,
5115                                                  dst_surface,
5116                                                  dst_rect,
5117                                                  PP_PA_LOAD_SAVE_PA,
5118                                                  NULL);
5119         intel_batchbuffer_flush(pp_context->batch);
5120         break;
5121
5122     default:
5123         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5124                                                       i965_image_pl1_processing,
5125                                                       src_surface,
5126                                                       src_rect,
5127                                                       dst_surface,
5128                                                       dst_rect);
5129         break;
5130     }
5131
5132     return vaStatus;
5133 }
5134
5135 VAStatus
5136 i965_image_processing(VADriverContextP ctx,
5137                       const struct i965_surface *src_surface,
5138                       const VARectangle *src_rect,
5139                       struct i965_surface *dst_surface,
5140                       const VARectangle *dst_rect)
5141 {
5142     struct i965_driver_data *i965 = i965_driver_data(ctx);
5143     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5144
5145     if (HAS_VPP(i965)) {
5146         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5147
5148         _i965LockMutex(&i965->pp_mutex);
5149
5150         switch (fourcc) {
5151         case VA_FOURCC_YV12:
5152         case VA_FOURCC_I420:
5153         case VA_FOURCC_IMC1:
5154         case VA_FOURCC_IMC3:
5155         case VA_FOURCC_422H:
5156         case VA_FOURCC_422V:
5157         case VA_FOURCC_411P:
5158         case VA_FOURCC_444P:
5159         case VA_FOURCC_YV16:
5160             status = i965_image_pl3_processing(ctx,
5161                                                src_surface,
5162                                                src_rect,
5163                                                dst_surface,
5164                                                dst_rect);
5165             break;
5166
5167         case  VA_FOURCC_NV12:
5168             status = i965_image_pl2_processing(ctx,
5169                                                src_surface,
5170                                                src_rect,
5171                                                dst_surface,
5172                                                dst_rect);
5173             break;
5174         case VA_FOURCC_YUY2:
5175         case VA_FOURCC_UYVY:
5176             status = i965_image_pl1_processing(ctx,
5177                                                src_surface,
5178                                                src_rect,
5179                                                dst_surface,
5180                                                dst_rect);
5181             break;
5182         case VA_FOURCC_BGRA:
5183         case VA_FOURCC_BGRX:
5184         case VA_FOURCC_RGBA:
5185         case VA_FOURCC_RGBX:
5186             status = i965_image_pl1_rgbx_processing(ctx,
5187                                                src_surface,
5188                                                src_rect,
5189                                                dst_surface,
5190                                                dst_rect);
5191             break;
5192         default:
5193             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5194             break;
5195         }
5196         
5197         _i965UnlockMutex(&i965->pp_mutex);
5198     }
5199
5200     return status;
5201 }       
5202
5203 static void
5204 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
5205 {
5206     int i;
5207
5208     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5209     pp_context->surface_state_binding_table.bo = NULL;
5210
5211     dri_bo_unreference(pp_context->curbe.bo);
5212     pp_context->curbe.bo = NULL;
5213
5214     dri_bo_unreference(pp_context->sampler_state_table.bo);
5215     pp_context->sampler_state_table.bo = NULL;
5216
5217     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5218     pp_context->sampler_state_table.bo_8x8 = NULL;
5219
5220     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5221     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5222
5223     dri_bo_unreference(pp_context->idrt.bo);
5224     pp_context->idrt.bo = NULL;
5225     pp_context->idrt.num_interface_descriptors = 0;
5226
5227     dri_bo_unreference(pp_context->vfe_state.bo);
5228     pp_context->vfe_state.bo = NULL;
5229
5230     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
5231     pp_context->pp_dndi_context.stmm_bo = NULL;
5232
5233     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5234     pp_context->pp_dn_context.stmm_bo = NULL;
5235
5236     for (i = 0; i < NUM_PP_MODULES; i++) {
5237         struct pp_module *pp_module = &pp_context->pp_modules[i];
5238
5239         dri_bo_unreference(pp_module->kernel.bo);
5240         pp_module->kernel.bo = NULL;
5241     }
5242
5243     free(pp_context->pp_static_parameter);
5244     free(pp_context->pp_inline_parameter);
5245     pp_context->pp_static_parameter = NULL;
5246     pp_context->pp_inline_parameter = NULL;
5247 }
5248
5249 void
5250 i965_post_processing_terminate(VADriverContextP ctx)
5251 {
5252     struct i965_driver_data *i965 = i965_driver_data(ctx);
5253     struct i965_post_processing_context *pp_context = i965->pp_context;
5254
5255     if (pp_context) {
5256         pp_context->finalize(pp_context);
5257         free(pp_context);
5258     }
5259
5260     i965->pp_context = NULL;
5261 }
5262
5263 #define VPP_CURBE_ALLOCATION_SIZE       32
5264
5265 void
5266 i965_post_processing_context_init(VADriverContextP ctx,
5267                                   void *data,
5268                                   struct intel_batchbuffer *batch)
5269 {
5270     struct i965_driver_data *i965 = i965_driver_data(ctx);
5271     int i;
5272     struct i965_post_processing_context *pp_context = data;
5273
5274     if (IS_IRONLAKE(i965->intel.device_info)) {
5275         pp_context->urb.size = i965->intel.device_info->urb_size;
5276         pp_context->urb.num_vfe_entries = 32;
5277         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5278         pp_context->urb.num_cs_entries = 1;
5279         pp_context->urb.size_cs_entry = 2;
5280         pp_context->urb.vfe_start = 0;
5281         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5282             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5283         assert(pp_context->urb.cs_start +
5284            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5285         pp_context->intel_post_processing = ironlake_post_processing;
5286     } else {
5287         pp_context->vfe_gpu_state.max_num_threads = 60;
5288         pp_context->vfe_gpu_state.num_urb_entries = 59;
5289         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5290         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5291         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5292         pp_context->intel_post_processing = gen6_post_processing;
5293     }
5294
5295     pp_context->finalize = i965_post_processing_context_finalize;
5296
5297     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5298     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5299     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5300     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5301
5302     if (IS_HASWELL(i965->intel.device_info))
5303         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5304     else if (IS_GEN7(i965->intel.device_info))
5305         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5306     else if (IS_GEN6(i965->intel.device_info))
5307         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5308     else if (IS_IRONLAKE(i965->intel.device_info))
5309         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5310
5311     for (i = 0; i < NUM_PP_MODULES; i++) {
5312         struct pp_module *pp_module = &pp_context->pp_modules[i];
5313         dri_bo_unreference(pp_module->kernel.bo);
5314         if (pp_module->kernel.bin && pp_module->kernel.size) {
5315             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5316                                                 pp_module->kernel.name,
5317                                                 pp_module->kernel.size,
5318                                                 4096);
5319             assert(pp_module->kernel.bo);
5320             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5321         } else {
5322             pp_module->kernel.bo = NULL;
5323         }
5324     }
5325
5326     /* static & inline parameters */
5327     if (IS_GEN7(i965->intel.device_info)) {
5328         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5329         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5330     } else {
5331         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5332         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5333     }
5334
5335     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
5336     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
5337     pp_context->pp_dndi_context.frame_order = -1;
5338     pp_context->batch = batch;
5339 }
5340
5341 bool
5342 i965_post_processing_init(VADriverContextP ctx)
5343 {
5344     struct i965_driver_data *i965 = i965_driver_data(ctx);
5345     struct i965_post_processing_context *pp_context = i965->pp_context;
5346
5347     if (HAS_VPP(i965)) {
5348         if (pp_context == NULL) {
5349             pp_context = calloc(1, sizeof(*pp_context));
5350             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5351             i965->pp_context = pp_context;
5352         }
5353     }
5354
5355     return true;
5356 }
5357
5358 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5359     PP_NULL,    /* VAProcFilterNone */
5360     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5361     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5362     PP_NULL,    /* VAProcFilterSharpening */
5363     PP_NULL,    /* VAProcFilterColorBalance */
5364 };
5365
5366 static const int proc_frame_to_pp_frame[3] = {
5367     I965_SURFACE_FLAG_FRAME,
5368     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5369     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5370 };
5371
5372 VAStatus 
5373 i965_proc_picture(VADriverContextP ctx, 
5374                   VAProfile profile, 
5375                   union codec_state *codec_state,
5376                   struct hw_context *hw_context)
5377 {
5378     struct i965_driver_data *i965 = i965_driver_data(ctx);
5379     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5380     struct proc_state *proc_state = &codec_state->proc;
5381     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5382     struct object_surface *obj_surface;
5383     struct i965_surface src_surface, dst_surface;
5384     VARectangle src_rect, dst_rect;
5385     VAStatus status;
5386     int i;
5387     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5388     int num_tmp_surfaces = 0;
5389     unsigned int tiling = 0, swizzle = 0;
5390     int in_width, in_height;
5391
5392     if (pipeline_param->surface == VA_INVALID_ID ||
5393         proc_state->current_render_target == VA_INVALID_ID) {
5394         status = VA_STATUS_ERROR_INVALID_SURFACE;
5395         goto error;
5396     }
5397
5398     obj_surface = SURFACE(pipeline_param->surface);
5399
5400     if (!obj_surface) {
5401         status = VA_STATUS_ERROR_INVALID_SURFACE;
5402         goto error;
5403     }
5404
5405     if (!obj_surface->bo) {
5406         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
5407         goto error;
5408     }
5409
5410     if (pipeline_param->num_filters && !pipeline_param->filters) {
5411         status = VA_STATUS_ERROR_INVALID_PARAMETER;
5412         goto error;
5413     }
5414
5415     in_width = obj_surface->orig_width;
5416     in_height = obj_surface->orig_height;
5417     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5418
5419     src_surface.base = (struct object_base *)obj_surface;
5420     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5421     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5422
5423     VASurfaceID out_surface_id = VA_INVALID_ID;
5424     if (obj_surface->fourcc != VA_FOURCC_NV12) {
5425         src_surface.base = (struct object_base *)obj_surface;
5426         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5427         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5428         src_rect.x = 0;
5429         src_rect.y = 0;
5430         src_rect.width = in_width;
5431         src_rect.height = in_height;
5432
5433         status = i965_CreateSurfaces(ctx,
5434                                      in_width,
5435                                      in_height,
5436                                      VA_RT_FORMAT_YUV420,
5437                                      1,
5438                                      &out_surface_id);
5439         assert(status == VA_STATUS_SUCCESS);
5440         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5441         obj_surface = SURFACE(out_surface_id);
5442         assert(obj_surface);
5443         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5444
5445         dst_surface.base = (struct object_base *)obj_surface;
5446         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5447         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5448         dst_rect.x = 0;
5449         dst_rect.y = 0;
5450         dst_rect.width = in_width;
5451         dst_rect.height = in_height;
5452
5453         status = i965_image_processing(ctx,
5454                                        &src_surface,
5455                                        &src_rect,
5456                                        &dst_surface,
5457                                        &dst_rect);
5458         assert(status == VA_STATUS_SUCCESS);
5459
5460         src_surface.base = (struct object_base *)obj_surface;
5461         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5462         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5463     }
5464
5465     if (pipeline_param->surface_region) {
5466         src_rect.x = pipeline_param->surface_region->x;
5467         src_rect.y = pipeline_param->surface_region->y;
5468         src_rect.width = pipeline_param->surface_region->width;
5469         src_rect.height = pipeline_param->surface_region->height;
5470     } else {
5471         src_rect.x = 0;
5472         src_rect.y = 0;
5473         src_rect.width = in_width;
5474         src_rect.height = in_height;
5475     }
5476
5477     if (pipeline_param->output_region) {
5478         dst_rect.x = pipeline_param->output_region->x;
5479         dst_rect.y = pipeline_param->output_region->y;
5480         dst_rect.width = pipeline_param->output_region->width;
5481         dst_rect.height = pipeline_param->output_region->height;
5482     } else {
5483         dst_rect.x = 0;
5484         dst_rect.y = 0;
5485         dst_rect.width = in_width;
5486         dst_rect.height = in_height;
5487     }
5488
5489     proc_context->pp_context.pipeline_param = pipeline_param;
5490
5491     for (i = 0; i < pipeline_param->num_filters; i++) {
5492         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5493         VAProcFilterParameterBufferBase *filter_param = NULL;
5494         VAProcFilterType filter_type;
5495         int kernel_index;
5496
5497         if (!obj_buffer ||
5498             !obj_buffer->buffer_store ||
5499             !obj_buffer->buffer_store->buffer) {
5500             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
5501             goto error;
5502         }
5503
5504         out_surface_id = VA_INVALID_ID;
5505         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5506         filter_type = filter_param->type;
5507         kernel_index = procfilter_to_pp_flag[filter_type];
5508
5509         if (kernel_index != PP_NULL &&
5510             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5511             status = i965_CreateSurfaces(ctx,
5512                                          in_width,
5513                                          in_height,
5514                                          VA_RT_FORMAT_YUV420,
5515                                          1,
5516                                          &out_surface_id);
5517             assert(status == VA_STATUS_SUCCESS);
5518             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5519             obj_surface = SURFACE(out_surface_id);
5520             assert(obj_surface);
5521             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5522             dst_surface.base = (struct object_base *)obj_surface;
5523             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5524             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5525                                                    &src_surface,
5526                                                    &src_rect,
5527                                                    &dst_surface,
5528                                                    &src_rect,
5529                                                    kernel_index,
5530                                                    filter_param);
5531
5532             if (status == VA_STATUS_SUCCESS) {
5533                 src_surface.base = dst_surface.base;
5534                 src_surface.type = dst_surface.type;
5535                 src_surface.flags = dst_surface.flags;
5536             }
5537         }
5538     }
5539
5540     proc_context->pp_context.pipeline_param = NULL;
5541     obj_surface = SURFACE(proc_state->current_render_target);
5542     
5543     if (!obj_surface) {
5544         status = VA_STATUS_ERROR_INVALID_SURFACE;
5545         goto error;
5546     }
5547
5548     int csc_needed = 0;
5549     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
5550         csc_needed = 1;
5551         out_surface_id = VA_INVALID_ID;
5552         status = i965_CreateSurfaces(ctx,
5553                                      obj_surface->orig_width,
5554                                      obj_surface->orig_height,
5555                                      VA_RT_FORMAT_YUV420, 
5556                                      1,
5557                                      &out_surface_id);
5558         assert(status == VA_STATUS_SUCCESS);
5559         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5560         struct object_surface *csc_surface = SURFACE(out_surface_id);
5561         assert(csc_surface);
5562         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5563         dst_surface.base = (struct object_base *)csc_surface;
5564     } else {
5565         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5566         dst_surface.base = (struct object_base *)obj_surface;
5567     }
5568
5569     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5570     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
5571
5572     // load/save doesn't support different origin offset for src and dst surface
5573     if (src_rect.width == dst_rect.width &&
5574         src_rect.height == dst_rect.height &&
5575         src_rect.x == dst_rect.x &&
5576         src_rect.y == dst_rect.y) {
5577         i965_post_processing_internal(ctx, &proc_context->pp_context,
5578                                       &src_surface,
5579                                       &src_rect,
5580                                       &dst_surface,
5581                                       &dst_rect,
5582                                       PP_NV12_LOAD_SAVE_N12,
5583                                       NULL);
5584     } else {
5585
5586         i965_post_processing_internal(ctx, &proc_context->pp_context,
5587                                       &src_surface,
5588                                       &src_rect,
5589                                       &dst_surface,
5590                                       &dst_rect,
5591                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5592                                       PP_NV12_AVS : PP_NV12_SCALING,
5593                                       NULL);
5594     }
5595
5596     if (csc_needed) {
5597         src_surface.base = dst_surface.base;
5598         src_surface.type = dst_surface.type;
5599         src_surface.flags = dst_surface.flags;
5600         dst_surface.base = (struct object_base *)obj_surface;
5601         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5602         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5603     }
5604     
5605     if (num_tmp_surfaces)
5606         i965_DestroySurfaces(ctx,
5607                              tmp_surfaces,
5608                              num_tmp_surfaces);
5609
5610     intel_batchbuffer_flush(hw_context->batch);
5611
5612     return VA_STATUS_SUCCESS;
5613
5614 error:
5615     if (num_tmp_surfaces)
5616         i965_DestroySurfaces(ctx,
5617                              tmp_surfaces,
5618                              num_tmp_surfaces);
5619
5620     return status;
5621 }
5622
5623 static void
5624 i965_proc_context_destroy(void *hw_context)
5625 {
5626     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5627
5628     i965_post_processing_context_finalize(&proc_context->pp_context);
5629     intel_batchbuffer_free(proc_context->base.batch);
5630     free(proc_context);
5631 }
5632
5633 struct hw_context *
5634 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5635 {
5636     struct i965_driver_data *i965 = i965_driver_data(ctx);
5637     struct intel_driver_data *intel = intel_driver_data(ctx);
5638     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5639
5640     proc_context->base.destroy = i965_proc_context_destroy;
5641     proc_context->base.run = i965_proc_picture;
5642     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5643     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5644
5645     return (struct hw_context *)proc_context;
5646 }
5647
5648