BDW doesn't support H.264 Baseline profile
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
44                      IS_GEN6((ctx)->intel.device_id) ||         \
45                      IS_GEN7((ctx)->intel.device_id) ||         \
46                      IS_GEN8((ctx)->intel.device_id))
47
48
49 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
50                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
51
52 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
53 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
54
55 #define GPU_ASM_BLOCK_WIDTH         16
56 #define GPU_ASM_BLOCK_HEIGHT        8
57 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
58
59 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
60
61 extern VAStatus
62 i965_DestroySurfaces(VADriverContextP ctx,
63                      VASurfaceID *surface_list,
64                      int num_surfaces);
65 extern VAStatus
66 i965_CreateSurfaces(VADriverContextP ctx,
67                     int width,
68                     int height,
69                     int format,
70                     int num_surfaces,
71                     VASurfaceID *surfaces);
72
73 static const uint32_t pp_null_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
75 };
76
77 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
79 };
80
81 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
83 };
84
85 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
87 };
88
89 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
91 };
92
93 static const uint32_t pp_nv12_scaling_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
95 };
96
97 static const uint32_t pp_nv12_avs_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
99 };
100
101 static const uint32_t pp_nv12_dndi_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
103 };
104
105 static const uint32_t pp_nv12_dn_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
107 };
108
109 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
111 };
112
113 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
115 };
116
117 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
119 };
120
121 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
123 };
124
125 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
127 };
128
129 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
130 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
131 };
132
133 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
134 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
135 };
136
137 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
138                                    const struct i965_surface *src_surface,
139                                    const VARectangle *src_rect,
140                                    struct i965_surface *dst_surface,
141                                    const VARectangle *dst_rect,
142                                    void *filter_param);
143 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
144                                             const struct i965_surface *src_surface,
145                                             const VARectangle *src_rect,
146                                             struct i965_surface *dst_surface,
147                                             const VARectangle *dst_rect,
148                                             void *filter_param);
149 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
150                                            const struct i965_surface *src_surface,
151                                            const VARectangle *src_rect,
152                                            struct i965_surface *dst_surface,
153                                            const VARectangle *dst_rect,
154                                            void *filter_param);
155 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
156                                              const struct i965_surface *src_surface,
157                                              const VARectangle *src_rect,
158                                              struct i965_surface *dst_surface,
159                                              const VARectangle *dst_rect,
160                                              void *filter_param);
161 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
162                                                 const struct i965_surface *src_surface,
163                                                 const VARectangle *src_rect,
164                                                 struct i965_surface *dst_surface,
165                                                 const VARectangle *dst_rect,
166                                                 void *filter_param);
167 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
168                                         const struct i965_surface *src_surface,
169                                         const VARectangle *src_rect,
170                                         struct i965_surface *dst_surface,
171                                         const VARectangle *dst_rect,
172                                         void *filter_param);
173 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
174                                       const struct i965_surface *src_surface,
175                                       const VARectangle *src_rect,
176                                       struct i965_surface *dst_surface,
177                                       const VARectangle *dst_rect,
178                                       void *filter_param);
179
180 static struct pp_module pp_modules_gen5[] = {
181     {
182         {
183             "NULL module (for testing)",
184             PP_NULL,
185             pp_null_gen5,
186             sizeof(pp_null_gen5),
187             NULL,
188         },
189
190         pp_null_initialize,
191     },
192
193     {
194         {
195             "NV12_NV12",
196             PP_NV12_LOAD_SAVE_N12,
197             pp_nv12_load_save_nv12_gen5,
198             sizeof(pp_nv12_load_save_nv12_gen5),
199             NULL,
200         },
201
202         pp_plx_load_save_plx_initialize,
203     },
204
205     {
206         {
207             "NV12_PL3",
208             PP_NV12_LOAD_SAVE_PL3,
209             pp_nv12_load_save_pl3_gen5,
210             sizeof(pp_nv12_load_save_pl3_gen5),
211             NULL,
212         },
213
214         pp_plx_load_save_plx_initialize,
215     },
216
217     {
218         {
219             "PL3_NV12",
220             PP_PL3_LOAD_SAVE_N12,
221             pp_pl3_load_save_nv12_gen5,
222             sizeof(pp_pl3_load_save_nv12_gen5),
223             NULL,
224         },
225
226         pp_plx_load_save_plx_initialize,
227     },
228
229     {
230         {
231             "PL3_PL3",
232             PP_PL3_LOAD_SAVE_PL3,
233             pp_pl3_load_save_pl3_gen5,
234             sizeof(pp_pl3_load_save_pl3_gen5),
235             NULL,
236         },
237
238         pp_plx_load_save_plx_initialize
239     },
240
241     {
242         {
243             "NV12 Scaling module",
244             PP_NV12_SCALING,
245             pp_nv12_scaling_gen5,
246             sizeof(pp_nv12_scaling_gen5),
247             NULL,
248         },
249
250         pp_nv12_scaling_initialize,
251     },
252
253     {
254         {
255             "NV12 AVS module",
256             PP_NV12_AVS,
257             pp_nv12_avs_gen5,
258             sizeof(pp_nv12_avs_gen5),
259             NULL,
260         },
261
262         pp_nv12_avs_initialize_nlas,
263     },
264
265     {
266         {
267             "NV12 DNDI module",
268             PP_NV12_DNDI,
269             pp_nv12_dndi_gen5,
270             sizeof(pp_nv12_dndi_gen5),
271             NULL,
272         },
273
274         pp_nv12_dndi_initialize,
275     },
276
277     {
278         {
279             "NV12 DN module",
280             PP_NV12_DN,
281             pp_nv12_dn_gen5,
282             sizeof(pp_nv12_dn_gen5),
283             NULL,
284         },
285
286         pp_nv12_dn_initialize,
287     },
288
289     {
290         {
291             "NV12_PA module",
292             PP_NV12_LOAD_SAVE_PA,
293             pp_nv12_load_save_pa_gen5,
294             sizeof(pp_nv12_load_save_pa_gen5),
295             NULL,
296         },
297     
298         pp_plx_load_save_plx_initialize,
299     },
300
301     {
302         {
303             "PL3_PA module",
304             PP_PL3_LOAD_SAVE_PA,
305             pp_pl3_load_save_pa_gen5,
306             sizeof(pp_pl3_load_save_pa_gen5),
307             NULL,
308         },
309     
310         pp_plx_load_save_plx_initialize,
311     },
312
313     {
314         {
315             "PA_NV12 module",
316             PP_PA_LOAD_SAVE_NV12,
317             pp_pa_load_save_nv12_gen5,
318             sizeof(pp_pa_load_save_nv12_gen5),
319             NULL,
320         },
321     
322         pp_plx_load_save_plx_initialize,
323     },
324
325     {
326         {
327             "PA_PL3 module",
328             PP_PA_LOAD_SAVE_PL3,
329             pp_pa_load_save_pl3_gen5,
330             sizeof(pp_pa_load_save_pl3_gen5),
331             NULL,
332         },
333     
334         pp_plx_load_save_plx_initialize,
335     },
336
337     {
338         {
339             "PA_PA module",
340             PP_PA_LOAD_SAVE_PA,
341             pp_pa_load_save_pa_gen5,
342             sizeof(pp_pa_load_save_pa_gen5),
343             NULL,
344         },
345
346         pp_plx_load_save_plx_initialize,
347     },
348
349     {
350         {
351             "RGBX_NV12 module",
352             PP_RGBX_LOAD_SAVE_NV12,
353             pp_rgbx_load_save_nv12_gen5,
354             sizeof(pp_rgbx_load_save_nv12_gen5),
355             NULL,
356         },
357     
358         pp_plx_load_save_plx_initialize,
359     },
360             
361     {
362         {
363             "NV12_RGBX module",
364             PP_NV12_LOAD_SAVE_RGBX,
365             pp_nv12_load_save_rgbx_gen5,
366             sizeof(pp_nv12_load_save_rgbx_gen5),
367             NULL,
368         },
369     
370         pp_plx_load_save_plx_initialize,
371     },
372 };
373
374 static const uint32_t pp_null_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/null.g6b"
376 };
377
378 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
380 };
381
382 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
384 };
385
386 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
388 };
389
390 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
392 };
393
394 static const uint32_t pp_nv12_scaling_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
396 };
397
398 static const uint32_t pp_nv12_avs_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
400 };
401
402 static const uint32_t pp_nv12_dndi_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
404 };
405
406 static const uint32_t pp_nv12_dn_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
408 };
409
410 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
412 };
413
414 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
416 };
417
418 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
420 };
421
422 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
424 };
425
426 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
428 };
429
430 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
431 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
432 };
433
434 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
435 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
436 };
437
438 static struct pp_module pp_modules_gen6[] = {
439     {
440         {
441             "NULL module (for testing)",
442             PP_NULL,
443             pp_null_gen6,
444             sizeof(pp_null_gen6),
445             NULL,
446         },
447
448         pp_null_initialize,
449     },
450
451     {
452         {
453             "NV12_NV12",
454             PP_NV12_LOAD_SAVE_N12,
455             pp_nv12_load_save_nv12_gen6,
456             sizeof(pp_nv12_load_save_nv12_gen6),
457             NULL,
458         },
459
460         pp_plx_load_save_plx_initialize,
461     },
462
463     {
464         {
465             "NV12_PL3",
466             PP_NV12_LOAD_SAVE_PL3,
467             pp_nv12_load_save_pl3_gen6,
468             sizeof(pp_nv12_load_save_pl3_gen6),
469             NULL,
470         },
471         
472         pp_plx_load_save_plx_initialize,
473     },
474
475     {
476         {
477             "PL3_NV12",
478             PP_PL3_LOAD_SAVE_N12,
479             pp_pl3_load_save_nv12_gen6,
480             sizeof(pp_pl3_load_save_nv12_gen6),
481             NULL,
482         },
483
484         pp_plx_load_save_plx_initialize,
485     },
486
487     {
488         {
489             "PL3_PL3",
490             PP_PL3_LOAD_SAVE_PL3,
491             pp_pl3_load_save_pl3_gen6,
492             sizeof(pp_pl3_load_save_pl3_gen6),
493             NULL,
494         },
495
496         pp_plx_load_save_plx_initialize,
497     },
498
499     {
500         {
501             "NV12 Scaling module",
502             PP_NV12_SCALING,
503             pp_nv12_scaling_gen6,
504             sizeof(pp_nv12_scaling_gen6),
505             NULL,
506         },
507
508         gen6_nv12_scaling_initialize,
509     },
510
511     {
512         {
513             "NV12 AVS module",
514             PP_NV12_AVS,
515             pp_nv12_avs_gen6,
516             sizeof(pp_nv12_avs_gen6),
517             NULL,
518         },
519
520         pp_nv12_avs_initialize_nlas,
521     },
522
523     {
524         {
525             "NV12 DNDI module",
526             PP_NV12_DNDI,
527             pp_nv12_dndi_gen6,
528             sizeof(pp_nv12_dndi_gen6),
529             NULL,
530         },
531
532         pp_nv12_dndi_initialize,
533     },
534
535     {
536         {
537             "NV12 DN module",
538             PP_NV12_DN,
539             pp_nv12_dn_gen6,
540             sizeof(pp_nv12_dn_gen6),
541             NULL,
542         },
543
544         pp_nv12_dn_initialize,
545     },
546     {
547         {
548             "NV12_PA module",
549             PP_NV12_LOAD_SAVE_PA,
550             pp_nv12_load_save_pa_gen6,
551             sizeof(pp_nv12_load_save_pa_gen6),
552             NULL,
553         },
554     
555         pp_plx_load_save_plx_initialize,
556     },
557
558     {
559         {
560             "PL3_PA module",
561             PP_PL3_LOAD_SAVE_PA,
562             pp_pl3_load_save_pa_gen6,
563             sizeof(pp_pl3_load_save_pa_gen6),
564             NULL,
565         },
566     
567         pp_plx_load_save_plx_initialize,
568     },
569
570     {
571         {
572             "PA_NV12 module",
573             PP_PA_LOAD_SAVE_NV12,
574             pp_pa_load_save_nv12_gen6,
575             sizeof(pp_pa_load_save_nv12_gen6),
576             NULL,
577         },
578     
579         pp_plx_load_save_plx_initialize,
580     },
581
582     {
583         {
584             "PA_PL3 module",
585             PP_PA_LOAD_SAVE_PL3,
586             pp_pa_load_save_pl3_gen6,
587             sizeof(pp_pa_load_save_pl3_gen6),
588             NULL,
589         },
590     
591         pp_plx_load_save_plx_initialize,
592     },
593
594     {
595         {
596             "PA_PA module",
597             PP_PA_LOAD_SAVE_PA,
598             pp_pa_load_save_pa_gen6,
599             sizeof(pp_pa_load_save_pa_gen6),
600             NULL,
601         },
602
603         pp_plx_load_save_plx_initialize,
604     },
605
606     {
607         {
608             "RGBX_NV12 module",
609             PP_RGBX_LOAD_SAVE_NV12,
610             pp_rgbx_load_save_nv12_gen6,
611             sizeof(pp_rgbx_load_save_nv12_gen6),
612             NULL,
613         },
614     
615         pp_plx_load_save_plx_initialize,
616     },
617
618     {
619         {
620             "NV12_RGBX module",
621             PP_NV12_LOAD_SAVE_RGBX,
622             pp_nv12_load_save_rgbx_gen6,
623             sizeof(pp_nv12_load_save_rgbx_gen6),
624             NULL,
625         },
626     
627         pp_plx_load_save_plx_initialize,
628     },
629 };
630
631 static const uint32_t pp_null_gen7[][4] = {
632 };
633
634 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
636 };
637
638 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
640 };
641
642 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
643 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
644 };
645
646 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
647 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
648 };
649
650 static const uint32_t pp_nv12_scaling_gen7[][4] = {
651 #include "shaders/post_processing/gen7/avs.g7b"
652 };
653
654 static const uint32_t pp_nv12_avs_gen7[][4] = {
655 #include "shaders/post_processing/gen7/avs.g7b"
656 };
657
658 static const uint32_t pp_nv12_dndi_gen7[][4] = {
659 #include "shaders/post_processing/gen7/dndi.g7b"
660 };
661
662 static const uint32_t pp_nv12_dn_gen7[][4] = {
663 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
664 };
665 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
666 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
667 };
668 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
669 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
670 };
671 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
672 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
673 };
674 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
675 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
676 };
677 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
678 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
679 };
680 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
681 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
682 };
683 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
684 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
685 };
686
687 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
688                                            const struct i965_surface *src_surface,
689                                            const VARectangle *src_rect,
690                                            struct i965_surface *dst_surface,
691                                            const VARectangle *dst_rect,
692                                            void *filter_param);
693 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
694                                              const struct i965_surface *src_surface,
695                                              const VARectangle *src_rect,
696                                              struct i965_surface *dst_surface,
697                                              const VARectangle *dst_rect,
698                                              void *filter_param);
699 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
700                                            const struct i965_surface *src_surface,
701                                            const VARectangle *src_rect,
702                                            struct i965_surface *dst_surface,
703                                            const VARectangle *dst_rect,
704                                            void *filter_param);
705
706 static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
707                                            const struct i965_surface *src_surface,
708                                            const VARectangle *src_rect,
709                                            struct i965_surface *dst_surface,
710                                            const VARectangle *dst_rect,
711                                            void *filter_param);
712
713 static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
714                                            const struct i965_surface *src_surface,
715                                            const VARectangle *src_rect,
716                                            struct i965_surface *dst_surface,
717                                            const VARectangle *dst_rect,
718                                            void *filter_param);
719
720 static struct pp_module pp_modules_gen7[] = {
721     {
722         {
723             "NULL module (for testing)",
724             PP_NULL,
725             pp_null_gen7,
726             sizeof(pp_null_gen7),
727             NULL,
728         },
729
730         pp_null_initialize,
731     },
732
733     {
734         {
735             "NV12_NV12",
736             PP_NV12_LOAD_SAVE_N12,
737             pp_nv12_load_save_nv12_gen7,
738             sizeof(pp_nv12_load_save_nv12_gen7),
739             NULL,
740         },
741
742         gen7_pp_plx_avs_initialize,
743     },
744
745     {
746         {
747             "NV12_PL3",
748             PP_NV12_LOAD_SAVE_PL3,
749             pp_nv12_load_save_pl3_gen7,
750             sizeof(pp_nv12_load_save_pl3_gen7),
751             NULL,
752         },
753         
754         gen7_pp_plx_avs_initialize,
755     },
756
757     {
758         {
759             "PL3_NV12",
760             PP_PL3_LOAD_SAVE_N12,
761             pp_pl3_load_save_nv12_gen7,
762             sizeof(pp_pl3_load_save_nv12_gen7),
763             NULL,
764         },
765
766         gen7_pp_plx_avs_initialize,
767     },
768
769     {
770         {
771             "PL3_PL3",
772             PP_PL3_LOAD_SAVE_PL3,
773             pp_pl3_load_save_pl3_gen7,
774             sizeof(pp_pl3_load_save_pl3_gen7),
775             NULL,
776         },
777
778         gen7_pp_plx_avs_initialize,
779     },
780
781     {
782         {
783             "NV12 Scaling module",
784             PP_NV12_SCALING,
785             pp_nv12_scaling_gen7,
786             sizeof(pp_nv12_scaling_gen7),
787             NULL,
788         },
789
790         gen7_pp_plx_avs_initialize,
791     },
792
793     {
794         {
795             "NV12 AVS module",
796             PP_NV12_AVS,
797             pp_nv12_avs_gen7,
798             sizeof(pp_nv12_avs_gen7),
799             NULL,
800         },
801
802         gen7_pp_plx_avs_initialize,
803     },
804
805     {
806         {
807             "NV12 DNDI module",
808             PP_NV12_DNDI,
809             pp_nv12_dndi_gen7,
810             sizeof(pp_nv12_dndi_gen7),
811             NULL,
812         },
813
814         gen7_pp_nv12_dndi_initialize,
815     },
816
817     {
818         {
819             "NV12 DN module",
820             PP_NV12_DN,
821             pp_nv12_dn_gen7,
822             sizeof(pp_nv12_dn_gen7),
823             NULL,
824         },
825
826         gen7_pp_nv12_dn_initialize,
827     },
828     {
829         {
830             "NV12_PA module",
831             PP_NV12_LOAD_SAVE_PA,
832             pp_nv12_load_save_pa_gen7,
833             sizeof(pp_nv12_load_save_pa_gen7),
834             NULL,
835         },
836     
837         gen7_pp_plx_avs_initialize,
838     },
839
840     {
841         {
842             "PL3_PA module",
843             PP_PL3_LOAD_SAVE_PA,
844             pp_pl3_load_save_pa_gen7,
845             sizeof(pp_pl3_load_save_pa_gen7),
846             NULL,
847         },
848     
849         gen7_pp_plx_avs_initialize,
850     },
851
852     {
853         {
854             "PA_NV12 module",
855             PP_PA_LOAD_SAVE_NV12,
856             pp_pa_load_save_nv12_gen7,
857             sizeof(pp_pa_load_save_nv12_gen7),
858             NULL,
859         },
860     
861         gen7_pp_plx_avs_initialize,
862     },
863
864     {
865         {
866             "PA_PL3 module",
867             PP_PA_LOAD_SAVE_PL3,
868             pp_pa_load_save_pl3_gen7,
869             sizeof(pp_pa_load_save_pl3_gen7),
870             NULL,
871         },
872     
873         gen7_pp_plx_avs_initialize,
874     },
875
876     {
877         {
878             "PA_PA module",
879             PP_PA_LOAD_SAVE_PA,
880             pp_pa_load_save_pa_gen7,
881             sizeof(pp_pa_load_save_pa_gen7),
882             NULL,
883         },
884
885         gen7_pp_plx_avs_initialize,
886     },
887
888     {
889         {
890             "RGBX_NV12 module",
891             PP_RGBX_LOAD_SAVE_NV12,
892             pp_rgbx_load_save_nv12_gen7,
893             sizeof(pp_rgbx_load_save_nv12_gen7),
894             NULL,
895         },
896     
897         gen7_pp_rgbx_avs_initialize,
898     },
899
900     {
901         {
902             "NV12_RGBX module",
903             PP_NV12_LOAD_SAVE_RGBX,
904             pp_nv12_load_save_rgbx_gen7,
905             sizeof(pp_nv12_load_save_rgbx_gen7),
906             NULL,
907         },
908     
909         gen7_pp_plx_avs_initialize,
910     },
911             
912 };
913
914 static const uint32_t pp_null_gen75[][4] = {
915 };
916
917 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
918 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
919 };
920
921 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
922 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
923 };
924
925 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
926 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
927 };
928
929 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
931 };
932
933 static const uint32_t pp_nv12_scaling_gen75[][4] = {
934 #include "shaders/post_processing/gen7/avs.g75b"
935 };
936
937 static const uint32_t pp_nv12_avs_gen75[][4] = {
938 #include "shaders/post_processing/gen7/avs.g75b"
939 };
940
941 static const uint32_t pp_nv12_dndi_gen75[][4] = {
942 // #include "shaders/post_processing/gen7/dndi.g75b"
943 };
944
945 static const uint32_t pp_nv12_dn_gen75[][4] = {
946 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
947 };
948 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
949 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
950 };
951 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
952 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
953 };
954 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
955 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
956 };
957 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
958 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
959 };
960 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
961 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
962 };
963 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
964 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
965 };
966 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
967 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
968 };
969
970 static struct pp_module pp_modules_gen75[] = {
971     {
972         {
973             "NULL module (for testing)",
974             PP_NULL,
975             pp_null_gen75,
976             sizeof(pp_null_gen75),
977             NULL,
978         },
979
980         pp_null_initialize,
981     },
982
983     {
984         {
985             "NV12_NV12",
986             PP_NV12_LOAD_SAVE_N12,
987             pp_nv12_load_save_nv12_gen75,
988             sizeof(pp_nv12_load_save_nv12_gen75),
989             NULL,
990         },
991
992         gen7_pp_plx_avs_initialize,
993     },
994
995     {
996         {
997             "NV12_PL3",
998             PP_NV12_LOAD_SAVE_PL3,
999             pp_nv12_load_save_pl3_gen75,
1000             sizeof(pp_nv12_load_save_pl3_gen75),
1001             NULL,
1002         },
1003         
1004         gen7_pp_plx_avs_initialize,
1005     },
1006
1007     {
1008         {
1009             "PL3_NV12",
1010             PP_PL3_LOAD_SAVE_N12,
1011             pp_pl3_load_save_nv12_gen75,
1012             sizeof(pp_pl3_load_save_nv12_gen75),
1013             NULL,
1014         },
1015
1016         gen7_pp_plx_avs_initialize,
1017     },
1018
1019     {
1020         {
1021             "PL3_PL3",
1022             PP_PL3_LOAD_SAVE_PL3,
1023             pp_pl3_load_save_pl3_gen75,
1024             sizeof(pp_pl3_load_save_pl3_gen75),
1025             NULL,
1026         },
1027
1028         gen7_pp_plx_avs_initialize,
1029     },
1030
1031     {
1032         {
1033             "NV12 Scaling module",
1034             PP_NV12_SCALING,
1035             pp_nv12_scaling_gen75,
1036             sizeof(pp_nv12_scaling_gen75),
1037             NULL,
1038         },
1039
1040         gen7_pp_plx_avs_initialize,
1041     },
1042
1043     {
1044         {
1045             "NV12 AVS module",
1046             PP_NV12_AVS,
1047             pp_nv12_avs_gen75,
1048             sizeof(pp_nv12_avs_gen75),
1049             NULL,
1050         },
1051
1052         gen7_pp_plx_avs_initialize,
1053     },
1054
1055     {
1056         {
1057             "NV12 DNDI module",
1058             PP_NV12_DNDI,
1059             pp_nv12_dndi_gen75,
1060             sizeof(pp_nv12_dndi_gen75),
1061             NULL,
1062         },
1063
1064         gen7_pp_nv12_dndi_initialize,
1065     },
1066
1067     {
1068         {
1069             "NV12 DN module",
1070             PP_NV12_DN,
1071             pp_nv12_dn_gen75,
1072             sizeof(pp_nv12_dn_gen75),
1073             NULL,
1074         },
1075
1076         gen7_pp_nv12_dn_initialize,
1077     },
1078
1079     {
1080         {
1081             "NV12_PA module",
1082             PP_NV12_LOAD_SAVE_PA,
1083             pp_nv12_load_save_pa_gen75,
1084             sizeof(pp_nv12_load_save_pa_gen75),
1085             NULL,
1086         },
1087     
1088         gen7_pp_plx_avs_initialize,
1089     },
1090
1091     {
1092         {
1093             "PL3_PA module",
1094             PP_PL3_LOAD_SAVE_PA,
1095             pp_pl3_load_save_pa_gen75,
1096             sizeof(pp_pl3_load_save_pa_gen75),
1097             NULL,
1098         },
1099     
1100         gen7_pp_plx_avs_initialize,
1101     },
1102
1103     {
1104         {
1105             "PA_NV12 module",
1106             PP_PA_LOAD_SAVE_NV12,
1107             pp_pa_load_save_nv12_gen75,
1108             sizeof(pp_pa_load_save_nv12_gen75),
1109             NULL,
1110         },
1111     
1112         gen7_pp_plx_avs_initialize,
1113     },
1114
1115     {
1116         {
1117             "PA_PL3 module",
1118             PP_PA_LOAD_SAVE_PL3,
1119             pp_pa_load_save_pl3_gen75,
1120             sizeof(pp_pa_load_save_pl3_gen75),
1121             NULL,
1122         },
1123     
1124         gen7_pp_plx_avs_initialize,
1125     },
1126
1127     {
1128         {
1129             "PA_PA module",
1130             PP_PA_LOAD_SAVE_PA,
1131             pp_pa_load_save_pa_gen75,
1132             sizeof(pp_pa_load_save_pa_gen75),
1133             NULL,
1134         },
1135
1136         gen7_pp_plx_avs_initialize,
1137     },
1138
1139     {
1140         {
1141             "RGBX_NV12 module",
1142             PP_RGBX_LOAD_SAVE_NV12,
1143             pp_rgbx_load_save_nv12_gen75,
1144             sizeof(pp_rgbx_load_save_nv12_gen75),
1145             NULL,
1146         },
1147     
1148         gen7_pp_rgbx_avs_initialize,
1149     },
1150
1151     {
1152         {
1153             "NV12_RGBX module",
1154             PP_NV12_LOAD_SAVE_RGBX,
1155             pp_nv12_load_save_rgbx_gen75,
1156             sizeof(pp_nv12_load_save_rgbx_gen75),
1157             NULL,
1158         },
1159     
1160         gen7_pp_plx_avs_initialize,
1161     },
1162             
1163 };
1164
1165 /* TODO: Modify the shader and then compile it again.
1166  * Currently it is derived from Haswell*/
1167 static const uint32_t pp_null_gen8[][4] = {
1168 };
1169
1170 static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = {
1171 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1172 };
1173
1174 static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = {
1175 #include "shaders/post_processing/gen8/pl2_to_pl3.g8b"
1176 };
1177
1178 static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = {
1179 #include "shaders/post_processing/gen8/pl3_to_pl2.g8b"
1180 };
1181
1182 static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = {
1183 #include "shaders/post_processing/gen8/pl3_to_pl3.g8b"
1184 };
1185
1186 static const uint32_t pp_nv12_scaling_gen8[][4] = {
1187 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1188 };
1189
1190 static const uint32_t pp_nv12_avs_gen8[][4] = {
1191 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1192 };
1193
1194 static const uint32_t pp_nv12_dndi_gen8[][4] = {
1195 // #include "shaders/post_processing/gen7/dndi.g75b"
1196 };
1197
1198 static const uint32_t pp_nv12_dn_gen8[][4] = {
1199 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
1200 };
1201 static const uint32_t pp_nv12_load_save_pa_gen8[][4] = {
1202 #include "shaders/post_processing/gen8/pl2_to_pa.g8b"
1203 };
1204 static const uint32_t pp_pl3_load_save_pa_gen8[][4] = {
1205 #include "shaders/post_processing/gen8/pl3_to_pa.g8b"
1206 };
1207 static const uint32_t pp_pa_load_save_nv12_gen8[][4] = {
1208 #include "shaders/post_processing/gen8/pa_to_pl2.g8b"
1209 };
1210 static const uint32_t pp_pa_load_save_pl3_gen8[][4] = {
1211 #include "shaders/post_processing/gen8/pa_to_pl3.g8b"
1212 };
1213 static const uint32_t pp_pa_load_save_pa_gen8[][4] = {
1214 };
1215 static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = {
1216 #include "shaders/post_processing/gen8/rgbx_to_nv12.g8b"
1217 };
1218 static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = {
1219 #include "shaders/post_processing/gen8/pl2_to_rgbx.g8b"
1220 };
1221
1222
1223 static struct pp_module pp_modules_gen8[] = {
1224     {
1225         {
1226             "NULL module (for testing)",
1227             PP_NULL,
1228             pp_null_gen8,
1229             sizeof(pp_null_gen8),
1230             NULL,
1231         },
1232
1233         pp_null_initialize,
1234     },
1235
1236     {
1237         {
1238             "NV12_NV12",
1239             PP_NV12_LOAD_SAVE_N12,
1240             pp_nv12_load_save_nv12_gen8,
1241             sizeof(pp_nv12_load_save_nv12_gen8),
1242             NULL,
1243         },
1244
1245         gen8_pp_plx_avs_initialize,
1246     },
1247
1248     {
1249         {
1250             "NV12_PL3",
1251             PP_NV12_LOAD_SAVE_PL3,
1252             pp_nv12_load_save_pl3_gen8,
1253             sizeof(pp_nv12_load_save_pl3_gen8),
1254             NULL,
1255         },
1256         
1257         gen8_pp_plx_avs_initialize,
1258     },
1259
1260     {
1261         {
1262             "PL3_NV12",
1263             PP_PL3_LOAD_SAVE_N12,
1264             pp_pl3_load_save_nv12_gen8,
1265             sizeof(pp_pl3_load_save_nv12_gen8),
1266             NULL,
1267         },
1268
1269         gen8_pp_plx_avs_initialize,
1270     },
1271
1272     {
1273         {
1274             "PL3_PL3",
1275             PP_PL3_LOAD_SAVE_N12,
1276             pp_pl3_load_save_pl3_gen8,
1277             sizeof(pp_pl3_load_save_pl3_gen8),
1278             NULL,
1279         },
1280
1281         gen8_pp_plx_avs_initialize,
1282     },
1283
1284     {
1285         {
1286             "NV12 Scaling module",
1287             PP_NV12_SCALING,
1288             pp_nv12_scaling_gen8,
1289             sizeof(pp_nv12_scaling_gen8),
1290             NULL,
1291         },
1292
1293         gen8_pp_plx_avs_initialize,
1294     },
1295
1296     {
1297         {
1298             "NV12 AVS module",
1299             PP_NV12_AVS,
1300             pp_nv12_avs_gen8,
1301             sizeof(pp_nv12_avs_gen8),
1302             NULL,
1303         },
1304
1305         gen8_pp_plx_avs_initialize,
1306     },
1307
1308     {
1309         {
1310             "NV12 DNDI module",
1311             PP_NV12_DNDI,
1312             pp_nv12_dndi_gen8,
1313             sizeof(pp_nv12_dndi_gen8),
1314             NULL,
1315         },
1316
1317         gen7_pp_nv12_dndi_initialize,
1318     },
1319
1320     {
1321         {
1322             "NV12 DN module",
1323             PP_NV12_DN,
1324             pp_nv12_dn_gen8,
1325             sizeof(pp_nv12_dn_gen8),
1326             NULL,
1327         },
1328
1329         gen7_pp_nv12_dn_initialize,
1330     },
1331     {
1332         {
1333             "NV12_PA module",
1334             PP_NV12_LOAD_SAVE_PA,
1335             pp_nv12_load_save_pa_gen8,
1336             sizeof(pp_nv12_load_save_pa_gen8),
1337             NULL,
1338         },
1339     
1340         gen8_pp_plx_avs_initialize,
1341     },
1342
1343     {
1344         {
1345             "PL3_PA module",
1346             PP_PL3_LOAD_SAVE_PA,
1347             pp_pl3_load_save_pa_gen8,
1348             sizeof(pp_pl3_load_save_pa_gen8),
1349             NULL,
1350         },
1351     
1352         gen8_pp_plx_avs_initialize,
1353     },
1354
1355     {
1356         {
1357             "PA_NV12 module",
1358             PP_PA_LOAD_SAVE_NV12,
1359             pp_pa_load_save_nv12_gen8,
1360             sizeof(pp_pa_load_save_nv12_gen8),
1361             NULL,
1362         },
1363     
1364         gen8_pp_plx_avs_initialize,
1365     },
1366
1367     {
1368         {
1369             "PA_PL3 module",
1370             PP_PA_LOAD_SAVE_PL3,
1371             pp_pa_load_save_pl3_gen8,
1372             sizeof(pp_pa_load_save_pl3_gen8),
1373             NULL,
1374         },
1375     
1376         gen8_pp_plx_avs_initialize,
1377     },
1378     
1379     {
1380         {
1381             "PA_PA module",
1382             PP_PA_LOAD_SAVE_PA,
1383             pp_pa_load_save_pa_gen8,
1384             sizeof(pp_pa_load_save_pa_gen8),
1385             NULL,
1386         },
1387
1388         gen8_pp_plx_avs_initialize,
1389     },
1390
1391     {
1392         {
1393             "RGBX_NV12 module",
1394             PP_RGBX_LOAD_SAVE_NV12,
1395             pp_rgbx_load_save_nv12_gen8,
1396             sizeof(pp_rgbx_load_save_nv12_gen8),
1397             NULL,
1398         },
1399     
1400         gen8_pp_plx_avs_initialize,
1401     },
1402
1403     {
1404         {
1405             "NV12_RGBX module",
1406             PP_NV12_LOAD_SAVE_RGBX,
1407             pp_nv12_load_save_rgbx_gen8,
1408             sizeof(pp_nv12_load_save_rgbx_gen8),
1409             NULL,
1410         },
1411     
1412         gen8_pp_plx_avs_initialize,
1413     },
1414             
1415 };
1416
1417           
1418 static int
1419 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1420 {
1421     int fourcc;
1422
1423     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1424         struct object_image *obj_image = (struct object_image *)surface->base;
1425         fourcc = obj_image->image.format.fourcc;
1426     } else {
1427         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1428         fourcc = obj_surface->fourcc;
1429     }
1430
1431     return fourcc;
1432 }
1433
1434 static void
1435 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1436 {
1437     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1438         struct object_image *obj_image = (struct object_image *)surface->base;
1439
1440         *width = obj_image->image.width;
1441         *height = obj_image->image.height;
1442     } else {
1443         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1444
1445         *width = obj_surface->orig_width;
1446         *height = obj_surface->orig_height;
1447     }
1448 }
1449
1450 static void
1451 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1452 {
1453     switch (tiling) {
1454     case I915_TILING_NONE:
1455         ss->ss3.tiled_surface = 0;
1456         ss->ss3.tile_walk = 0;
1457         break;
1458     case I915_TILING_X:
1459         ss->ss3.tiled_surface = 1;
1460         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1461         break;
1462     case I915_TILING_Y:
1463         ss->ss3.tiled_surface = 1;
1464         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1465         break;
1466     }
1467 }
1468
1469 static void
1470 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1471 {
1472     switch (tiling) {
1473     case I915_TILING_NONE:
1474         ss->ss2.tiled_surface = 0;
1475         ss->ss2.tile_walk = 0;
1476         break;
1477     case I915_TILING_X:
1478         ss->ss2.tiled_surface = 1;
1479         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1480         break;
1481     case I915_TILING_Y:
1482         ss->ss2.tiled_surface = 1;
1483         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1484         break;
1485     }
1486 }
1487
1488 static void
1489 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1490 {
1491     switch (tiling) {
1492     case I915_TILING_NONE:
1493         ss->ss0.tiled_surface = 0;
1494         ss->ss0.tile_walk = 0;
1495         break;
1496     case I915_TILING_X:
1497         ss->ss0.tiled_surface = 1;
1498         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1499         break;
1500     case I915_TILING_Y:
1501         ss->ss0.tiled_surface = 1;
1502         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1503         break;
1504     }
1505 }
1506
1507 static void
1508 gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
1509 {
1510     switch (tiling) {
1511     case I915_TILING_NONE:
1512         ss->ss0.tiled_surface = 0;
1513         ss->ss0.tile_walk = 0;
1514         break;
1515     case I915_TILING_X:
1516         ss->ss0.tiled_surface = 1;
1517         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1518         break;
1519     case I915_TILING_Y:
1520         ss->ss0.tiled_surface = 1;
1521         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1522         break;
1523     }
1524 }
1525
1526 static void
1527 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1528 {
1529     switch (tiling) {
1530     case I915_TILING_NONE:
1531         ss->ss2.tiled_surface = 0;
1532         ss->ss2.tile_walk = 0;
1533         break;
1534     case I915_TILING_X:
1535         ss->ss2.tiled_surface = 1;
1536         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1537         break;
1538     case I915_TILING_Y:
1539         ss->ss2.tiled_surface = 1;
1540         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1541         break;
1542     }
1543 }
1544
1545 static void
1546 gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
1547 {
1548     switch (tiling) {
1549     case I915_TILING_NONE:
1550         ss->ss2.tiled_surface = 0;
1551         ss->ss2.tile_walk = 0;
1552         break;
1553     case I915_TILING_X:
1554         ss->ss2.tiled_surface = 1;
1555         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1556         break;
1557     case I915_TILING_Y:
1558         ss->ss2.tiled_surface = 1;
1559         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1560         break;
1561     }
1562 }
1563
1564 static void
1565 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1566 {
1567     struct i965_interface_descriptor *desc;
1568     dri_bo *bo;
1569     int pp_index = pp_context->current_pp;
1570
1571     bo = pp_context->idrt.bo;
1572     dri_bo_map(bo, 1);
1573     assert(bo->virtual);
1574     desc = bo->virtual;
1575     memset(desc, 0, sizeof(*desc));
1576     desc->desc0.grf_reg_blocks = 10;
1577     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1578     desc->desc1.const_urb_entry_read_offset = 0;
1579     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1580     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1581     desc->desc2.sampler_count = 0;
1582     desc->desc3.binding_table_entry_count = 0;
1583     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1584
1585     dri_bo_emit_reloc(bo,
1586                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1587                       desc->desc0.grf_reg_blocks,
1588                       offsetof(struct i965_interface_descriptor, desc0),
1589                       pp_context->pp_modules[pp_index].kernel.bo);
1590
1591     dri_bo_emit_reloc(bo,
1592                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1593                       desc->desc2.sampler_count << 2,
1594                       offsetof(struct i965_interface_descriptor, desc2),
1595                       pp_context->sampler_state_table.bo);
1596
1597     dri_bo_unmap(bo);
1598     pp_context->idrt.num_interface_descriptors++;
1599 }
1600
1601 static void
1602 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1603 {
1604     struct i965_vfe_state *vfe_state;
1605     dri_bo *bo;
1606
1607     bo = pp_context->vfe_state.bo;
1608     dri_bo_map(bo, 1);
1609     assert(bo->virtual);
1610     vfe_state = bo->virtual;
1611     memset(vfe_state, 0, sizeof(*vfe_state));
1612     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1613     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1614     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1615     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1616     vfe_state->vfe1.children_present = 0;
1617     vfe_state->vfe2.interface_descriptor_base = 
1618         pp_context->idrt.bo->offset >> 4; /* reloc */
1619     dri_bo_emit_reloc(bo,
1620                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1621                       0,
1622                       offsetof(struct i965_vfe_state, vfe2),
1623                       pp_context->idrt.bo);
1624     dri_bo_unmap(bo);
1625 }
1626
1627 static void
1628 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1629 {
1630     unsigned char *constant_buffer;
1631     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1632
1633     assert(sizeof(*pp_static_parameter) == 128);
1634     dri_bo_map(pp_context->curbe.bo, 1);
1635     assert(pp_context->curbe.bo->virtual);
1636     constant_buffer = pp_context->curbe.bo->virtual;
1637     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1638     dri_bo_unmap(pp_context->curbe.bo);
1639 }
1640
1641 static void
1642 ironlake_pp_states_setup(VADriverContextP ctx,
1643                          struct i965_post_processing_context *pp_context)
1644 {
1645     ironlake_pp_interface_descriptor_table(pp_context);
1646     ironlake_pp_vfe_state(pp_context);
1647     ironlake_pp_upload_constants(pp_context);
1648 }
1649
1650 static void
1651 ironlake_pp_pipeline_select(VADriverContextP ctx,
1652                             struct i965_post_processing_context *pp_context)
1653 {
1654     struct intel_batchbuffer *batch = pp_context->batch;
1655
1656     BEGIN_BATCH(batch, 1);
1657     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1658     ADVANCE_BATCH(batch);
1659 }
1660
1661 static void
1662 ironlake_pp_urb_layout(VADriverContextP ctx,
1663                        struct i965_post_processing_context *pp_context)
1664 {
1665     struct intel_batchbuffer *batch = pp_context->batch;
1666     unsigned int vfe_fence, cs_fence;
1667
1668     vfe_fence = pp_context->urb.cs_start;
1669     cs_fence = pp_context->urb.size;
1670
1671     BEGIN_BATCH(batch, 3);
1672     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1673     OUT_BATCH(batch, 0);
1674     OUT_BATCH(batch, 
1675               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1676               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1677     ADVANCE_BATCH(batch);
1678 }
1679
1680 static void
1681 ironlake_pp_state_base_address(VADriverContextP ctx,
1682                                struct i965_post_processing_context *pp_context)
1683 {
1684     struct intel_batchbuffer *batch = pp_context->batch;
1685
1686     BEGIN_BATCH(batch, 8);
1687     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1688     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1689     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1690     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1691     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1692     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1693     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1694     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1695     ADVANCE_BATCH(batch);
1696 }
1697
1698 static void
1699 ironlake_pp_state_pointers(VADriverContextP ctx,
1700                            struct i965_post_processing_context *pp_context)
1701 {
1702     struct intel_batchbuffer *batch = pp_context->batch;
1703
1704     BEGIN_BATCH(batch, 3);
1705     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1706     OUT_BATCH(batch, 0);
1707     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1708     ADVANCE_BATCH(batch);
1709 }
1710
1711 static void 
1712 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1713                           struct i965_post_processing_context *pp_context)
1714 {
1715     struct intel_batchbuffer *batch = pp_context->batch;
1716
1717     BEGIN_BATCH(batch, 2);
1718     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1719     OUT_BATCH(batch,
1720               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1721               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1722     ADVANCE_BATCH(batch);
1723 }
1724
1725 static void
1726 ironlake_pp_constant_buffer(VADriverContextP ctx,
1727                             struct i965_post_processing_context *pp_context)
1728 {
1729     struct intel_batchbuffer *batch = pp_context->batch;
1730
1731     BEGIN_BATCH(batch, 2);
1732     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1733     OUT_RELOC(batch, pp_context->curbe.bo,
1734               I915_GEM_DOMAIN_INSTRUCTION, 0,
1735               pp_context->urb.size_cs_entry - 1);
1736     ADVANCE_BATCH(batch);    
1737 }
1738
1739 static void
1740 ironlake_pp_object_walker(VADriverContextP ctx,
1741                           struct i965_post_processing_context *pp_context)
1742 {
1743     struct intel_batchbuffer *batch = pp_context->batch;
1744     int x, x_steps, y, y_steps;
1745     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1746
1747     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1748     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1749
1750     for (y = 0; y < y_steps; y++) {
1751         for (x = 0; x < x_steps; x++) {
1752             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1753                 BEGIN_BATCH(batch, 20);
1754                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1755                 OUT_BATCH(batch, 0);
1756                 OUT_BATCH(batch, 0); /* no indirect data */
1757                 OUT_BATCH(batch, 0);
1758
1759                 /* inline data grf 5-6 */
1760                 assert(sizeof(*pp_inline_parameter) == 64);
1761                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1762
1763                 ADVANCE_BATCH(batch);
1764             }
1765         }
1766     }
1767 }
1768
1769 static void
1770 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1771                            struct i965_post_processing_context *pp_context)
1772 {
1773     struct intel_batchbuffer *batch = pp_context->batch;
1774
1775     intel_batchbuffer_start_atomic(batch, 0x1000);
1776     intel_batchbuffer_emit_mi_flush(batch);
1777     ironlake_pp_pipeline_select(ctx, pp_context);
1778     ironlake_pp_state_base_address(ctx, pp_context);
1779     ironlake_pp_state_pointers(ctx, pp_context);
1780     ironlake_pp_urb_layout(ctx, pp_context);
1781     ironlake_pp_cs_urb_layout(ctx, pp_context);
1782     ironlake_pp_constant_buffer(ctx, pp_context);
1783     ironlake_pp_object_walker(ctx, pp_context);
1784     intel_batchbuffer_end_atomic(batch);
1785 }
1786
1787 // update u/v offset when the surface format are packed yuv
1788 static void i965_update_src_surface_static_parameter(
1789     VADriverContextP    ctx, 
1790     struct i965_post_processing_context *pp_context,
1791     const struct i965_surface *surface)
1792 {
1793     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1794     int fourcc = pp_get_surface_fourcc(ctx, surface);
1795
1796     switch (fourcc) {
1797     case VA_FOURCC('Y', 'U', 'Y', '2'):
1798         pp_static_parameter->grf1.source_packed_u_offset = 1;
1799         pp_static_parameter->grf1.source_packed_v_offset = 3;
1800         break;
1801     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1802         pp_static_parameter->grf1.source_packed_y_offset = 1;
1803         pp_static_parameter->grf1.source_packed_v_offset = 2;
1804         break;
1805     case VA_FOURCC('B', 'G', 'R', 'X'):
1806     case VA_FOURCC('B', 'G', 'R', 'A'):
1807         pp_static_parameter->grf1.source_rgb_layout = 0;
1808         break;
1809     case VA_FOURCC('R', 'G', 'B', 'X'):
1810     case VA_FOURCC('R', 'G', 'B', 'A'):
1811         pp_static_parameter->grf1.source_rgb_layout = 1;
1812         break;
1813     default:
1814         break;
1815     }
1816     
1817 }
1818
1819 static void i965_update_dst_surface_static_parameter(
1820     VADriverContextP    ctx, 
1821     struct i965_post_processing_context *pp_context,
1822     const struct i965_surface *surface)
1823 {
1824     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1825     int fourcc = pp_get_surface_fourcc(ctx, surface);
1826
1827     switch (fourcc) {
1828     case VA_FOURCC('Y', 'U', 'Y', '2'):
1829         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1830         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1831         break;
1832     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1833         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1834         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1835         break;
1836     case VA_FOURCC('B', 'G', 'R', 'X'):
1837     case VA_FOURCC('B', 'G', 'R', 'A'):
1838         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1839         break;
1840     case VA_FOURCC('R', 'G', 'B', 'X'):
1841     case VA_FOURCC('R', 'G', 'B', 'A'):
1842         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1843         break;
1844     default:
1845         break;
1846     }
1847     
1848 }
1849
1850 static void
1851 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1852                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1853                           int width, int height, int pitch, int format, 
1854                           int index, int is_target)
1855 {
1856     struct i965_surface_state *ss;
1857     dri_bo *ss_bo;
1858     unsigned int tiling;
1859     unsigned int swizzle;
1860
1861     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1862     ss_bo = pp_context->surface_state_binding_table.bo;
1863     assert(ss_bo);
1864
1865     dri_bo_map(ss_bo, True);
1866     assert(ss_bo->virtual);
1867     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1868     memset(ss, 0, sizeof(*ss));
1869     ss->ss0.surface_type = I965_SURFACE_2D;
1870     ss->ss0.surface_format = format;
1871     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1872     ss->ss2.width = width - 1;
1873     ss->ss2.height = height - 1;
1874     ss->ss3.pitch = pitch - 1;
1875     pp_set_surface_tiling(ss, tiling);
1876     dri_bo_emit_reloc(ss_bo,
1877                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1878                       surf_bo_offset,
1879                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1880                       surf_bo);
1881     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1882     dri_bo_unmap(ss_bo);
1883 }
1884
1885 static void
1886 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1887                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1888                            int width, int height, int wpitch,
1889                            int xoffset, int yoffset,
1890                            int format, int interleave_chroma,
1891                            int index)
1892 {
1893     struct i965_surface_state2 *ss2;
1894     dri_bo *ss2_bo;
1895     unsigned int tiling;
1896     unsigned int swizzle;
1897
1898     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1899     ss2_bo = pp_context->surface_state_binding_table.bo;
1900     assert(ss2_bo);
1901
1902     dri_bo_map(ss2_bo, True);
1903     assert(ss2_bo->virtual);
1904     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1905     memset(ss2, 0, sizeof(*ss2));
1906     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1907     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1908     ss2->ss1.width = width - 1;
1909     ss2->ss1.height = height - 1;
1910     ss2->ss2.pitch = wpitch - 1;
1911     ss2->ss2.interleave_chroma = interleave_chroma;
1912     ss2->ss2.surface_format = format;
1913     ss2->ss3.x_offset_for_cb = xoffset;
1914     ss2->ss3.y_offset_for_cb = yoffset;
1915     pp_set_surface2_tiling(ss2, tiling);
1916     dri_bo_emit_reloc(ss2_bo,
1917                       I915_GEM_DOMAIN_RENDER, 0,
1918                       surf_bo_offset,
1919                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1920                       surf_bo);
1921     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1922     dri_bo_unmap(ss2_bo);
1923 }
1924
1925 static void
1926 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1927                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1928                           int width, int height, int pitch, int format, 
1929                           int index, int is_target)
1930 {
1931     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1932     struct gen7_surface_state *ss;
1933     dri_bo *ss_bo;
1934     unsigned int tiling;
1935     unsigned int swizzle;
1936
1937     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1938     ss_bo = pp_context->surface_state_binding_table.bo;
1939     assert(ss_bo);
1940
1941     dri_bo_map(ss_bo, True);
1942     assert(ss_bo->virtual);
1943     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1944     memset(ss, 0, sizeof(*ss));
1945     ss->ss0.surface_type = I965_SURFACE_2D;
1946     ss->ss0.surface_format = format;
1947     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1948     ss->ss2.width = width - 1;
1949     ss->ss2.height = height - 1;
1950     ss->ss3.pitch = pitch - 1;
1951     gen7_pp_set_surface_tiling(ss, tiling);
1952     if (IS_HASWELL(i965->intel.device_id))
1953         gen7_render_set_surface_scs(ss);
1954     dri_bo_emit_reloc(ss_bo,
1955                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1956                       surf_bo_offset,
1957                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1958                       surf_bo);
1959     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1960     dri_bo_unmap(ss_bo);
1961 }
1962
1963 static void
1964 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1965                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1966                            int width, int height, int wpitch,
1967                            int xoffset, int yoffset,
1968                            int format, int interleave_chroma,
1969                            int index)
1970 {
1971     struct gen7_surface_state2 *ss2;
1972     dri_bo *ss2_bo;
1973     unsigned int tiling;
1974     unsigned int swizzle;
1975
1976     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1977     ss2_bo = pp_context->surface_state_binding_table.bo;
1978     assert(ss2_bo);
1979
1980     dri_bo_map(ss2_bo, True);
1981     assert(ss2_bo->virtual);
1982     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1983     memset(ss2, 0, sizeof(*ss2));
1984     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1985     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1986     ss2->ss1.width = width - 1;
1987     ss2->ss1.height = height - 1;
1988     ss2->ss2.pitch = wpitch - 1;
1989     ss2->ss2.interleave_chroma = interleave_chroma;
1990     ss2->ss2.surface_format = format;
1991     ss2->ss3.x_offset_for_cb = xoffset;
1992     ss2->ss3.y_offset_for_cb = yoffset;
1993     gen7_pp_set_surface2_tiling(ss2, tiling);
1994     dri_bo_emit_reloc(ss2_bo,
1995                       I915_GEM_DOMAIN_RENDER, 0,
1996                       surf_bo_offset,
1997                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1998                       surf_bo);
1999     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
2000     dri_bo_unmap(ss2_bo);
2001 }
2002
2003 static void
2004 gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2005                           dri_bo *surf_bo, unsigned long surf_bo_offset,
2006                           int width, int height, int pitch, int format, 
2007                           int index, int is_target)
2008 {
2009     struct gen8_surface_state *ss;
2010     dri_bo *ss_bo;
2011     unsigned int tiling;
2012     unsigned int swizzle;
2013
2014     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
2015     ss_bo = pp_context->surface_state_binding_table.bo;
2016     assert(ss_bo);
2017
2018     dri_bo_map(ss_bo, True);
2019     assert(ss_bo->virtual);
2020     ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
2021     memset(ss, 0, sizeof(*ss));
2022     ss->ss0.surface_type = I965_SURFACE_2D;
2023     ss->ss0.surface_format = format;
2024     ss->ss8.base_addr = surf_bo->offset + surf_bo_offset;
2025     ss->ss2.width = width - 1;
2026     ss->ss2.height = height - 1;
2027     ss->ss3.pitch = pitch - 1;
2028
2029     /* Always set 1(align 4 mode) per B-spec */
2030     ss->ss0.vertical_alignment = 1;
2031     ss->ss0.horizontal_alignment = 1;
2032
2033     gen8_pp_set_surface_tiling(ss, tiling);
2034     gen8_render_set_surface_scs(ss);
2035     dri_bo_emit_reloc(ss_bo,
2036                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
2037                       surf_bo_offset,
2038                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
2039                       surf_bo);
2040     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
2041     dri_bo_unmap(ss_bo);
2042 }
2043
2044
2045 static void
2046 gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2047                            dri_bo *surf_bo, unsigned long surf_bo_offset,
2048                            int width, int height, int wpitch,
2049                            int xoffset, int yoffset,
2050                            int format, int interleave_chroma,
2051                            int index)
2052 {
2053     struct gen8_surface_state2 *ss2;
2054     dri_bo *ss2_bo;
2055     unsigned int tiling;
2056     unsigned int swizzle;
2057
2058     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
2059     ss2_bo = pp_context->surface_state_binding_table.bo;
2060     assert(ss2_bo);
2061
2062     dri_bo_map(ss2_bo, True);
2063     assert(ss2_bo->virtual);
2064     ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
2065     memset(ss2, 0, sizeof(*ss2));
2066     ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset;
2067     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
2068     ss2->ss1.width = width - 1;
2069     ss2->ss1.height = height - 1;
2070     ss2->ss2.pitch = wpitch - 1;
2071     ss2->ss2.interleave_chroma = interleave_chroma;
2072     ss2->ss2.surface_format = format;
2073     ss2->ss3.x_offset_for_cb = xoffset;
2074     ss2->ss3.y_offset_for_cb = yoffset;
2075     gen8_pp_set_surface2_tiling(ss2, tiling);
2076     dri_bo_emit_reloc(ss2_bo,
2077                       I915_GEM_DOMAIN_RENDER, 0,
2078                       surf_bo_offset,
2079                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6),
2080                       surf_bo);
2081     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
2082     dri_bo_unmap(ss2_bo);
2083 }
2084
2085 static void 
2086 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2087                                 const struct i965_surface *surface, 
2088                                 int base_index, int is_target,
2089                                 int *width, int *height, int *pitch, int *offset)
2090 {
2091     struct object_surface *obj_surface;
2092     struct object_image *obj_image;
2093     dri_bo *bo;
2094     int fourcc = pp_get_surface_fourcc(ctx, surface);
2095     const int Y = 0;
2096     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
2097     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
2098     const int UV = 1;
2099     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2100     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
2101     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2102                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2103                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2104                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2105     int scale_factor_of_1st_plane_width_in_byte = 1;
2106                               
2107     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2108         obj_surface = (struct object_surface *)surface->base;
2109         bo = obj_surface->bo;
2110         width[0] = obj_surface->orig_width;
2111         height[0] = obj_surface->orig_height;
2112         pitch[0] = obj_surface->width;
2113         offset[0] = 0;
2114
2115         if (full_packed_format) {
2116             scale_factor_of_1st_plane_width_in_byte = 4; 
2117         }
2118         else if (packed_yuv ) {
2119             scale_factor_of_1st_plane_width_in_byte =  2; 
2120         }
2121         else if (interleaved_uv) {
2122             width[1] = obj_surface->orig_width;
2123             height[1] = obj_surface->orig_height / 2;
2124             pitch[1] = obj_surface->width;
2125             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2126         } else {
2127             width[1] = obj_surface->orig_width / 2;
2128             height[1] = obj_surface->orig_height / 2;
2129             pitch[1] = obj_surface->width / 2;
2130             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2131             width[2] = obj_surface->orig_width / 2;
2132             height[2] = obj_surface->orig_height / 2;
2133             pitch[2] = obj_surface->width / 2;
2134             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2135         }
2136     } else {
2137         obj_image = (struct object_image *)surface->base;
2138         bo = obj_image->bo;
2139         width[0] = obj_image->image.width;
2140         height[0] = obj_image->image.height;
2141         pitch[0] = obj_image->image.pitches[0];
2142         offset[0] = obj_image->image.offsets[0];
2143
2144         if (full_packed_format) {
2145             scale_factor_of_1st_plane_width_in_byte = 4;
2146         }
2147         else if (packed_yuv ) {
2148             scale_factor_of_1st_plane_width_in_byte = 2;
2149         }
2150         else if (interleaved_uv) {
2151             width[1] = obj_image->image.width;
2152             height[1] = obj_image->image.height / 2;
2153             pitch[1] = obj_image->image.pitches[1];
2154             offset[1] = obj_image->image.offsets[1];
2155         } else {
2156             width[1] = obj_image->image.width / 2;
2157             height[1] = obj_image->image.height / 2;
2158             pitch[1] = obj_image->image.pitches[1];
2159             offset[1] = obj_image->image.offsets[1];
2160             width[2] = obj_image->image.width / 2;
2161             height[2] = obj_image->image.height / 2;
2162             pitch[2] = obj_image->image.pitches[2];
2163             offset[2] = obj_image->image.offsets[2];
2164         }
2165     }
2166
2167     /* Y surface */
2168     i965_pp_set_surface_state(ctx, pp_context,
2169                               bo, offset[Y],
2170                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2171                               base_index, is_target);
2172
2173     if (!packed_yuv && !full_packed_format) {
2174         if (interleaved_uv) {
2175             i965_pp_set_surface_state(ctx, pp_context,
2176                                       bo, offset[UV],
2177                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2178                                       base_index + 1, is_target);
2179         } else {
2180             /* U surface */
2181             i965_pp_set_surface_state(ctx, pp_context,
2182                                       bo, offset[U],
2183                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2184                                       base_index + 1, is_target);
2185
2186             /* V surface */
2187             i965_pp_set_surface_state(ctx, pp_context,
2188                                       bo, offset[V],
2189                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2190                                       base_index + 2, is_target);
2191         }
2192     }
2193
2194 }
2195
2196 static void 
2197 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2198                                      const struct i965_surface *surface, 
2199                                      int base_index, int is_target,
2200                                      int *width, int *height, int *pitch, int *offset)
2201 {
2202     struct object_surface *obj_surface;
2203     struct object_image *obj_image;
2204     dri_bo *bo;
2205     int fourcc = pp_get_surface_fourcc(ctx, surface);
2206     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2207                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
2208     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2209                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
2210     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2211     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
2212     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2213                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2214                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2215                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2216
2217     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2218         obj_surface = (struct object_surface *)surface->base;
2219         bo = obj_surface->bo;
2220         width[0] = obj_surface->orig_width;
2221         height[0] = obj_surface->orig_height;
2222         pitch[0] = obj_surface->width;
2223         offset[0] = 0;
2224
2225         if (packed_yuv) {
2226             if (is_target)
2227                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
2228             else
2229                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
2230         } else if (rgbx_format) {
2231             if (is_target)
2232                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
2233         }
2234
2235         width[1] = obj_surface->cb_cr_width;
2236         height[1] = obj_surface->cb_cr_height;
2237         pitch[1] = obj_surface->cb_cr_pitch;
2238         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2239
2240         width[2] = obj_surface->cb_cr_width;
2241         height[2] = obj_surface->cb_cr_height;
2242         pitch[2] = obj_surface->cb_cr_pitch;
2243         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2244     } else {
2245         obj_image = (struct object_image *)surface->base;
2246         bo = obj_image->bo;
2247         width[0] = obj_image->image.width;
2248         height[0] = obj_image->image.height;
2249         pitch[0] = obj_image->image.pitches[0];
2250         offset[0] = obj_image->image.offsets[0];
2251
2252         if (rgbx_format) {
2253             if (is_target)
2254                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
2255         } else if (packed_yuv) {
2256             if (is_target)
2257                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
2258             else
2259                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
2260         } else if (interleaved_uv) {
2261             width[1] = obj_image->image.width / 2;
2262             height[1] = obj_image->image.height / 2;
2263             pitch[1] = obj_image->image.pitches[1];
2264             offset[1] = obj_image->image.offsets[1];
2265         } else {
2266             width[1] = obj_image->image.width / 2;
2267             height[1] = obj_image->image.height / 2;
2268             pitch[1] = obj_image->image.pitches[U];
2269             offset[1] = obj_image->image.offsets[U];
2270             width[2] = obj_image->image.width / 2;
2271             height[2] = obj_image->image.height / 2;
2272             pitch[2] = obj_image->image.pitches[V];
2273             offset[2] = obj_image->image.offsets[V];
2274         }
2275     }
2276
2277     if (is_target) {
2278         gen7_pp_set_surface_state(ctx, pp_context,
2279                                   bo, 0,
2280                                   width[0] / 4, height[0], pitch[0],
2281                                   I965_SURFACEFORMAT_R8_UINT,
2282                                   base_index, 1);
2283         if (rgbx_format) {
2284                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2285                 /* the format is MSB: X-B-G-R */
2286                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2287                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2288                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2289                         /* It is stored as MSB: X-R-G-B */
2290                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2291                 }
2292         }
2293         if (!packed_yuv && !rgbx_format) {
2294             if (interleaved_uv) {
2295                 gen7_pp_set_surface_state(ctx, pp_context,
2296                                           bo, offset[1],
2297                                           width[1] / 2, height[1], pitch[1],
2298                                           I965_SURFACEFORMAT_R8G8_SINT,
2299                                           base_index + 1, 1);
2300             } else {
2301                 gen7_pp_set_surface_state(ctx, pp_context,
2302                                           bo, offset[1],
2303                                           width[1] / 4, height[1], pitch[1],
2304                                           I965_SURFACEFORMAT_R8_SINT,
2305                                           base_index + 1, 1);
2306                 gen7_pp_set_surface_state(ctx, pp_context,
2307                                           bo, offset[2],
2308                                           width[2] / 4, height[2], pitch[2],
2309                                           I965_SURFACEFORMAT_R8_SINT,
2310                                           base_index + 2, 1);
2311             }
2312         }
2313     } else {
2314         int format0 = SURFACE_FORMAT_Y8_UNORM;
2315
2316         switch (fourcc) {
2317         case VA_FOURCC('Y', 'U', 'Y', '2'):
2318             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2319             break;
2320
2321         case VA_FOURCC('U', 'Y', 'V', 'Y'):
2322             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2323             break;
2324
2325         default:
2326             break;
2327         }
2328         if (rgbx_format) {
2329             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2330             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2331             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2332             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2333             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2334                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2335                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2336             }
2337         }
2338         gen7_pp_set_surface2_state(ctx, pp_context,
2339                                    bo, offset[0],
2340                                    width[0], height[0], pitch[0],
2341                                    0, 0,
2342                                    format0, 0,
2343                                    base_index);
2344
2345         if (!packed_yuv && !rgbx_format) {
2346             if (interleaved_uv) {
2347                 gen7_pp_set_surface2_state(ctx, pp_context,
2348                                            bo, offset[1],
2349                                            width[1], height[1], pitch[1],
2350                                            0, 0,
2351                                            SURFACE_FORMAT_R8B8_UNORM, 0,
2352                                            base_index + 1);
2353             } else {
2354                 gen7_pp_set_surface2_state(ctx, pp_context,
2355                                            bo, offset[1],
2356                                            width[1], height[1], pitch[1],
2357                                            0, 0,
2358                                            SURFACE_FORMAT_R8_UNORM, 0,
2359                                            base_index + 1);
2360                 gen7_pp_set_surface2_state(ctx, pp_context,
2361                                            bo, offset[2],
2362                                            width[2], height[2], pitch[2],
2363                                            0, 0,
2364                                            SURFACE_FORMAT_R8_UNORM, 0,
2365                                            base_index + 2);
2366             }
2367         }
2368     }
2369 }
2370
2371 static void 
2372 gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2373                                      const struct i965_surface *surface, 
2374                                      int base_index, int is_target,
2375                                      int *width, int *height, int *pitch, int *offset)
2376 {
2377     struct object_surface *obj_surface;
2378     struct object_image *obj_image;
2379     dri_bo *bo;
2380     int fourcc = pp_get_surface_fourcc(ctx, surface);
2381     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2382                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
2383     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2384                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
2385     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2386     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
2387     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2388                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2389                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2390                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2391
2392     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2393         obj_surface = (struct object_surface *)surface->base;
2394         bo = obj_surface->bo;
2395         width[0] = obj_surface->orig_width;
2396         height[0] = obj_surface->orig_height;
2397         pitch[0] = obj_surface->width;
2398         offset[0] = 0;
2399
2400         if (packed_yuv) {
2401             if (is_target)
2402                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
2403             else
2404                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
2405
2406             pitch[0] = obj_surface->width * 2;
2407         } else if (rgbx_format) {
2408             if (is_target)
2409                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
2410             pitch[0] = obj_surface->width * 4;
2411         }
2412
2413         width[1] = obj_surface->cb_cr_width;
2414         height[1] = obj_surface->cb_cr_height;
2415         pitch[1] = obj_surface->cb_cr_pitch;
2416         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2417
2418         width[2] = obj_surface->cb_cr_width;
2419         height[2] = obj_surface->cb_cr_height;
2420         pitch[2] = obj_surface->cb_cr_pitch;
2421         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2422     } else {
2423         obj_image = (struct object_image *)surface->base;
2424         bo = obj_image->bo;
2425         width[0] = obj_image->image.width;
2426         height[0] = obj_image->image.height;
2427         pitch[0] = obj_image->image.pitches[0];
2428         offset[0] = obj_image->image.offsets[0];
2429
2430         if (rgbx_format) {
2431             if (is_target)
2432                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
2433         } else if (packed_yuv) {
2434             if (is_target)
2435                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
2436             else
2437                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
2438         } else if (interleaved_uv) {
2439             width[1] = obj_image->image.width / 2;
2440             height[1] = obj_image->image.height / 2;
2441             pitch[1] = obj_image->image.pitches[1];
2442             offset[1] = obj_image->image.offsets[1];
2443         } else {
2444             width[1] = obj_image->image.width / 2;
2445             height[1] = obj_image->image.height / 2;
2446             pitch[1] = obj_image->image.pitches[U];
2447             offset[1] = obj_image->image.offsets[U];
2448             width[2] = obj_image->image.width / 2;
2449             height[2] = obj_image->image.height / 2;
2450             pitch[2] = obj_image->image.pitches[V];
2451             offset[2] = obj_image->image.offsets[V];
2452         }
2453     }
2454
2455     if (is_target) {
2456         gen8_pp_set_surface_state(ctx, pp_context,
2457                                   bo, 0,
2458                                   width[0] / 4, height[0], pitch[0],
2459                                   I965_SURFACEFORMAT_R8_UINT,
2460                                   base_index, 1);
2461         if (rgbx_format) {
2462                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2463                 /* the format is MSB: X-B-G-R */
2464                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2465                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2466                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2467                         /* It is stored as MSB: X-R-G-B */
2468                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2469                 }
2470         }
2471         if (!packed_yuv && !rgbx_format) {
2472             if (interleaved_uv) {
2473                 gen8_pp_set_surface_state(ctx, pp_context,
2474                                           bo, offset[1],
2475                                           width[1] / 2, height[1], pitch[1],
2476                                           I965_SURFACEFORMAT_R8G8_SINT,
2477                                           base_index + 1, 1);
2478             } else {
2479                 gen8_pp_set_surface_state(ctx, pp_context,
2480                                           bo, offset[1],
2481                                           width[1] / 4, height[1], pitch[1],
2482                                           I965_SURFACEFORMAT_R8_SINT,
2483                                           base_index + 1, 1);
2484                 gen8_pp_set_surface_state(ctx, pp_context,
2485                                           bo, offset[2],
2486                                           width[2] / 4, height[2], pitch[2],
2487                                           I965_SURFACEFORMAT_R8_SINT,
2488                                           base_index + 2, 1);
2489             }
2490         }
2491     } else {
2492         int format0 = SURFACE_FORMAT_Y8_UNORM;
2493
2494         switch (fourcc) {
2495         case VA_FOURCC('Y', 'U', 'Y', '2'):
2496             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2497             break;
2498
2499         case VA_FOURCC('U', 'Y', 'V', 'Y'):
2500             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2501             break;
2502
2503         default:
2504             break;
2505         }
2506         if (rgbx_format) {
2507             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2508             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2509             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2510             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2511             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2512                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2513                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2514             }
2515         }
2516         gen8_pp_set_surface2_state(ctx, pp_context,
2517                                    bo, offset[0],
2518                                    width[0], height[0], pitch[0],
2519                                    0, 0,
2520                                    format0, 0,
2521                                    base_index);
2522
2523         if (!packed_yuv && !rgbx_format) {
2524             if (interleaved_uv) {
2525                 gen8_pp_set_surface2_state(ctx, pp_context,
2526                                            bo, offset[1],
2527                                            width[1], height[1], pitch[1],
2528                                            0, 0,
2529                                            SURFACE_FORMAT_R8B8_UNORM, 0,
2530                                            base_index + 1);
2531             } else {
2532                 gen8_pp_set_surface2_state(ctx, pp_context,
2533                                            bo, offset[1],
2534                                            width[1], height[1], pitch[1],
2535                                            0, 0,
2536                                            SURFACE_FORMAT_R8_UNORM, 0,
2537                                            base_index + 1);
2538                 gen8_pp_set_surface2_state(ctx, pp_context,
2539                                            bo, offset[2],
2540                                            width[2], height[2], pitch[2],
2541                                            0, 0,
2542                                            SURFACE_FORMAT_R8_UNORM, 0,
2543                                            base_index + 2);
2544             }
2545         }
2546     }
2547 }
2548
2549 static int
2550 pp_null_x_steps(void *private_context)
2551 {
2552     return 1;
2553 }
2554
2555 static int
2556 pp_null_y_steps(void *private_context)
2557 {
2558     return 1;
2559 }
2560
2561 static int
2562 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2563 {
2564     return 0;
2565 }
2566
2567 static VAStatus
2568 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2569                    const struct i965_surface *src_surface,
2570                    const VARectangle *src_rect,
2571                    struct i965_surface *dst_surface,
2572                    const VARectangle *dst_rect,
2573                    void *filter_param)
2574 {
2575     /* private function & data */
2576     pp_context->pp_x_steps = pp_null_x_steps;
2577     pp_context->pp_y_steps = pp_null_y_steps;
2578     pp_context->private_context = NULL;
2579     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2580
2581     dst_surface->flags = src_surface->flags;
2582
2583     return VA_STATUS_SUCCESS;
2584 }
2585
2586 static int
2587 pp_load_save_x_steps(void *private_context)
2588 {
2589     return 1;
2590 }
2591
2592 static int
2593 pp_load_save_y_steps(void *private_context)
2594 {
2595     struct pp_load_save_context *pp_load_save_context = private_context;
2596
2597     return pp_load_save_context->dest_h / 8;
2598 }
2599
2600 static int
2601 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2602 {
2603     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2604     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2605
2606     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2607     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2608
2609     return 0;
2610 }
2611
2612 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2613 {
2614     int i;
2615     /* x offset of dest surface must be dword aligned.
2616      * so we have to extend dst surface on left edge, and mask out pixels not interested
2617      */
2618     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2619         pp_context->block_horizontal_mask_left = 0;
2620         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2621         {
2622             pp_context->block_horizontal_mask_left |= 1<<i;
2623         }
2624     }
2625     else {
2626         pp_context->block_horizontal_mask_left = 0xffff;
2627     }
2628     
2629     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2630     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2631         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2632     }
2633     else {
2634         pp_context->block_horizontal_mask_right = 0xffff;
2635     }
2636     
2637     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2638         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2639     }
2640     else {
2641         pp_context->block_vertical_mask_bottom = 0xff;
2642     }
2643
2644 }
2645 static VAStatus
2646 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2647                                 const struct i965_surface *src_surface,
2648                                 const VARectangle *src_rect,
2649                                 struct i965_surface *dst_surface,
2650                                 const VARectangle *dst_rect,
2651                                 void *filter_param)
2652 {
2653     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2654     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2655     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2656     int width[3], height[3], pitch[3], offset[3];
2657
2658     /* source surface */
2659     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2660                                     width, height, pitch, offset);
2661
2662     /* destination surface */
2663     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2664                                     width, height, pitch, offset);
2665
2666     /* private function & data */
2667     pp_context->pp_x_steps = pp_load_save_x_steps;
2668     pp_context->pp_y_steps = pp_load_save_y_steps;
2669     pp_context->private_context = &pp_context->pp_load_save_context;
2670     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2671
2672     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2673     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2674     pp_load_save_context->dest_y = dst_rect->y;
2675     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2676     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2677
2678     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2679     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2680
2681     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2682     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2683
2684     // update u/v offset for packed yuv
2685     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2686     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2687
2688     dst_surface->flags = src_surface->flags;
2689
2690     return VA_STATUS_SUCCESS;
2691 }
2692
2693 static int
2694 pp_scaling_x_steps(void *private_context)
2695 {
2696     return 1;
2697 }
2698
2699 static int
2700 pp_scaling_y_steps(void *private_context)
2701 {
2702     struct pp_scaling_context *pp_scaling_context = private_context;
2703
2704     return pp_scaling_context->dest_h / 8;
2705 }
2706
2707 static int
2708 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2709 {
2710     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2711     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2712     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2713     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2714     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2715
2716     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2717     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2718     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2719     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2720     
2721     return 0;
2722 }
2723
2724 static VAStatus
2725 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2726                            const struct i965_surface *src_surface,
2727                            const VARectangle *src_rect,
2728                            struct i965_surface *dst_surface,
2729                            const VARectangle *dst_rect,
2730                            void *filter_param)
2731 {
2732     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2733     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2734     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2735     struct object_surface *obj_surface;
2736     struct i965_sampler_state *sampler_state;
2737     int in_w, in_h, in_wpitch, in_hpitch;
2738     int out_w, out_h, out_wpitch, out_hpitch;
2739
2740     /* source surface */
2741     obj_surface = (struct object_surface *)src_surface->base;
2742     in_w = obj_surface->orig_width;
2743     in_h = obj_surface->orig_height;
2744     in_wpitch = obj_surface->width;
2745     in_hpitch = obj_surface->height;
2746
2747     /* source Y surface index 1 */
2748     i965_pp_set_surface_state(ctx, pp_context,
2749                               obj_surface->bo, 0,
2750                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2751                               1, 0);
2752
2753     /* source UV surface index 2 */
2754     i965_pp_set_surface_state(ctx, pp_context,
2755                               obj_surface->bo, in_wpitch * in_hpitch,
2756                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2757                               2, 0);
2758
2759     /* destination surface */
2760     obj_surface = (struct object_surface *)dst_surface->base;
2761     out_w = obj_surface->orig_width;
2762     out_h = obj_surface->orig_height;
2763     out_wpitch = obj_surface->width;
2764     out_hpitch = obj_surface->height;
2765
2766     /* destination Y surface index 7 */
2767     i965_pp_set_surface_state(ctx, pp_context,
2768                               obj_surface->bo, 0,
2769                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2770                               7, 1);
2771
2772     /* destination UV surface index 8 */
2773     i965_pp_set_surface_state(ctx, pp_context,
2774                               obj_surface->bo, out_wpitch * out_hpitch,
2775                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2776                               8, 1);
2777
2778     /* sampler state */
2779     dri_bo_map(pp_context->sampler_state_table.bo, True);
2780     assert(pp_context->sampler_state_table.bo->virtual);
2781     sampler_state = pp_context->sampler_state_table.bo->virtual;
2782
2783     /* SIMD16 Y index 1 */
2784     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2785     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2786     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2787     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2788     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2789
2790     /* SIMD16 UV index 2 */
2791     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2792     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2793     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2794     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2795     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2796
2797     dri_bo_unmap(pp_context->sampler_state_table.bo);
2798
2799     /* private function & data */
2800     pp_context->pp_x_steps = pp_scaling_x_steps;
2801     pp_context->pp_y_steps = pp_scaling_y_steps;
2802     pp_context->private_context = &pp_context->pp_scaling_context;
2803     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2804
2805     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2806     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2807     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2808     pp_scaling_context->dest_y = dst_rect->y;
2809     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2810     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2811     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2812     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2813
2814     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2815
2816     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2817     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2818     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2819
2820     dst_surface->flags = src_surface->flags;
2821
2822     return VA_STATUS_SUCCESS;
2823 }
2824
2825 static int
2826 pp_avs_x_steps(void *private_context)
2827 {
2828     struct pp_avs_context *pp_avs_context = private_context;
2829
2830     return pp_avs_context->dest_w / 16;
2831 }
2832
2833 static int
2834 pp_avs_y_steps(void *private_context)
2835 {
2836     return 1;
2837 }
2838
2839 static int
2840 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2841 {
2842     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2843     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2844     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2845     float src_x_steping, src_y_steping, video_step_delta;
2846     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2847
2848     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2849         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2850         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2851     } else if (tmp_w >= pp_avs_context->dest_w) {
2852         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2853         pp_inline_parameter->grf6.video_step_delta = 0;
2854         
2855         if (x == 0) {
2856             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2857                 pp_avs_context->src_normalized_x;
2858         } else {
2859             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2860             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2861             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2862                 16 * 15 * video_step_delta / 2;
2863         }
2864     } else {
2865         int n0, n1, n2, nls_left, nls_right;
2866         int factor_a = 5, factor_b = 4;
2867         float f;
2868
2869         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2870         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2871         n2 = tmp_w / (16 * factor_a);
2872         nls_left = n0 + n2;
2873         nls_right = n1 + n2;
2874         f = (float) n2 * 16 / tmp_w;
2875         
2876         if (n0 < 5) {
2877             pp_inline_parameter->grf6.video_step_delta = 0.0;
2878
2879             if (x == 0) {
2880                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2881                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2882             } else {
2883                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2884                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2885                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2886                     16 * 15 * video_step_delta / 2;
2887             }
2888         } else {
2889             if (x < nls_left) {
2890                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2891                 float a = f / (nls_left * 16 * factor_b);
2892                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2893                 
2894                 pp_inline_parameter->grf6.video_step_delta = b;
2895
2896                 if (x == 0) {
2897                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2898                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2899                 } else {
2900                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2901                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2902                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2903                         16 * 15 * video_step_delta / 2;
2904                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2905                 }
2906             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2907                 /* scale the center linearly */
2908                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2909                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2910                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2911                     16 * 15 * video_step_delta / 2;
2912                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2913                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2914             } else {
2915                 float a = f / (nls_right * 16 * factor_b);
2916                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2917
2918                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2919                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2920                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2921                     16 * 15 * video_step_delta / 2;
2922                 pp_inline_parameter->grf6.video_step_delta = -b;
2923
2924                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2925                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2926                 else
2927                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2928             }
2929         }
2930     }
2931
2932     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2933     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2934     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2935     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2936
2937     return 0;
2938 }
2939
2940 static VAStatus
2941 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2942                        const struct i965_surface *src_surface,
2943                        const VARectangle *src_rect,
2944                        struct i965_surface *dst_surface,
2945                        const VARectangle *dst_rect,
2946                        void *filter_param,
2947                        int nlas)
2948 {
2949     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2950     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2951     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2952     struct object_surface *obj_surface;
2953     struct i965_sampler_8x8 *sampler_8x8;
2954     struct i965_sampler_8x8_state *sampler_8x8_state;
2955     int index;
2956     int in_w, in_h, in_wpitch, in_hpitch;
2957     int out_w, out_h, out_wpitch, out_hpitch;
2958     int i;
2959
2960     /* surface */
2961     obj_surface = (struct object_surface *)src_surface->base;
2962     in_w = obj_surface->orig_width;
2963     in_h = obj_surface->orig_height;
2964     in_wpitch = obj_surface->width;
2965     in_hpitch = obj_surface->height;
2966
2967     /* source Y surface index 1 */
2968     i965_pp_set_surface2_state(ctx, pp_context,
2969                                obj_surface->bo, 0,
2970                                in_w, in_h, in_wpitch,
2971                                0, 0,
2972                                SURFACE_FORMAT_Y8_UNORM, 0,
2973                                1);
2974
2975     /* source UV surface index 2 */
2976     i965_pp_set_surface2_state(ctx, pp_context,
2977                                obj_surface->bo, in_wpitch * in_hpitch,
2978                                in_w / 2, in_h / 2, in_wpitch,
2979                                0, 0,
2980                                SURFACE_FORMAT_R8B8_UNORM, 0,
2981                                2);
2982
2983     /* destination surface */
2984     obj_surface = (struct object_surface *)dst_surface->base;
2985     out_w = obj_surface->orig_width;
2986     out_h = obj_surface->orig_height;
2987     out_wpitch = obj_surface->width;
2988     out_hpitch = obj_surface->height;
2989     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2990
2991     /* destination Y surface index 7 */
2992     i965_pp_set_surface_state(ctx, pp_context,
2993                               obj_surface->bo, 0,
2994                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2995                               7, 1);
2996
2997     /* destination UV surface index 8 */
2998     i965_pp_set_surface_state(ctx, pp_context,
2999                               obj_surface->bo, out_wpitch * out_hpitch,
3000                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
3001                               8, 1);
3002
3003     /* sampler 8x8 state */
3004     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3005     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3006     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3007     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3008     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3009
3010     for (i = 0; i < 17; i++) {
3011         /* for Y channel, currently ignore */
3012         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
3013         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
3014         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
3015         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
3016         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
3017         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
3018         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
3019         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
3020         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
3021         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
3022         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
3023         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
3024         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
3025         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
3026         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
3027         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
3028         /* for U/V channel, 0.25 */
3029         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
3030         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
3031         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
3032         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
3033         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
3034         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
3035         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
3036         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
3037         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
3038         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
3039         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
3040         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
3041         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
3042         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
3043         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
3044         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
3045     }
3046
3047     sampler_8x8_state->dw136.default_sharpness_level = 0;
3048     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
3049     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
3050     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
3051     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3052
3053     /* sampler 8x8 */
3054     dri_bo_map(pp_context->sampler_state_table.bo, True);
3055     assert(pp_context->sampler_state_table.bo->virtual);
3056     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
3057     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3058
3059     /* sample_8x8 Y index 1 */
3060     index = 1;
3061     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3062     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
3063     sampler_8x8[index].dw0.ief_bypass = 1;
3064     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
3065     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
3066     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3067     sampler_8x8[index].dw2.global_noise_estimation = 22;
3068     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3069     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3070     sampler_8x8[index].dw3.strong_edge_weight = 7;
3071     sampler_8x8[index].dw3.regular_weight = 2;
3072     sampler_8x8[index].dw3.non_edge_weight = 0;
3073     sampler_8x8[index].dw3.gain_factor = 40;
3074     sampler_8x8[index].dw4.steepness_boost = 0;
3075     sampler_8x8[index].dw4.steepness_threshold = 0;
3076     sampler_8x8[index].dw4.mr_boost = 0;
3077     sampler_8x8[index].dw4.mr_threshold = 5;
3078     sampler_8x8[index].dw5.pwl1_point_1 = 4;
3079     sampler_8x8[index].dw5.pwl1_point_2 = 12;
3080     sampler_8x8[index].dw5.pwl1_point_3 = 16;
3081     sampler_8x8[index].dw5.pwl1_point_4 = 26;
3082     sampler_8x8[index].dw6.pwl1_point_5 = 40;
3083     sampler_8x8[index].dw6.pwl1_point_6 = 160;
3084     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
3085     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
3086     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
3087     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
3088     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
3089     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
3090     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
3091     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
3092     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
3093     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
3094     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
3095     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
3096     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
3097     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
3098     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
3099     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
3100     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
3101     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
3102     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
3103     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
3104     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
3105     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
3106     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
3107     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
3108     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
3109     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
3110     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
3111     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
3112     sampler_8x8[index].dw13.limiter_boost = 0;
3113     sampler_8x8[index].dw13.minimum_limiter = 10;
3114     sampler_8x8[index].dw13.maximum_limiter = 11;
3115     sampler_8x8[index].dw14.clip_limiter = 130;
3116     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3117                       I915_GEM_DOMAIN_RENDER, 
3118                       0,
3119                       0,
3120                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3121                       pp_context->sampler_state_table.bo_8x8);
3122
3123     /* sample_8x8 UV index 2 */
3124     index = 2;
3125     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3126     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
3127     sampler_8x8[index].dw0.ief_bypass = 1;
3128     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
3129     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
3130     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3131     sampler_8x8[index].dw2.global_noise_estimation = 22;
3132     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3133     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3134     sampler_8x8[index].dw3.strong_edge_weight = 7;
3135     sampler_8x8[index].dw3.regular_weight = 2;
3136     sampler_8x8[index].dw3.non_edge_weight = 0;
3137     sampler_8x8[index].dw3.gain_factor = 40;
3138     sampler_8x8[index].dw4.steepness_boost = 0;
3139     sampler_8x8[index].dw4.steepness_threshold = 0;
3140     sampler_8x8[index].dw4.mr_boost = 0;
3141     sampler_8x8[index].dw4.mr_threshold = 5;
3142     sampler_8x8[index].dw5.pwl1_point_1 = 4;
3143     sampler_8x8[index].dw5.pwl1_point_2 = 12;
3144     sampler_8x8[index].dw5.pwl1_point_3 = 16;
3145     sampler_8x8[index].dw5.pwl1_point_4 = 26;
3146     sampler_8x8[index].dw6.pwl1_point_5 = 40;
3147     sampler_8x8[index].dw6.pwl1_point_6 = 160;
3148     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
3149     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
3150     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
3151     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
3152     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
3153     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
3154     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
3155     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
3156     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
3157     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
3158     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
3159     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
3160     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
3161     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
3162     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
3163     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
3164     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
3165     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
3166     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
3167     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
3168     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
3169     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
3170     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
3171     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
3172     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
3173     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
3174     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
3175     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
3176     sampler_8x8[index].dw13.limiter_boost = 0;
3177     sampler_8x8[index].dw13.minimum_limiter = 10;
3178     sampler_8x8[index].dw13.maximum_limiter = 11;
3179     sampler_8x8[index].dw14.clip_limiter = 130;
3180     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3181                       I915_GEM_DOMAIN_RENDER, 
3182                       0,
3183                       0,
3184                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3185                       pp_context->sampler_state_table.bo_8x8);
3186
3187     dri_bo_unmap(pp_context->sampler_state_table.bo);
3188
3189     /* private function & data */
3190     pp_context->pp_x_steps = pp_avs_x_steps;
3191     pp_context->pp_y_steps = pp_avs_y_steps;
3192     pp_context->private_context = &pp_context->pp_avs_context;
3193     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
3194
3195     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
3196     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
3197     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3198     pp_avs_context->dest_y = dst_rect->y;
3199     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3200     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
3201     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
3202     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
3203     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
3204     pp_avs_context->src_h = src_rect->height;
3205
3206     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
3207     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
3208
3209     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
3210     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
3211     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
3212     pp_inline_parameter->grf6.video_step_delta = 0.0;
3213
3214     dst_surface->flags = src_surface->flags;
3215
3216     return VA_STATUS_SUCCESS;
3217 }
3218
3219 static VAStatus
3220 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3221                             const struct i965_surface *src_surface,
3222                             const VARectangle *src_rect,
3223                             struct i965_surface *dst_surface,
3224                             const VARectangle *dst_rect,
3225                             void *filter_param)
3226 {
3227     return pp_nv12_avs_initialize(ctx, pp_context,
3228                                   src_surface,
3229                                   src_rect,
3230                                   dst_surface,
3231                                   dst_rect,
3232                                   filter_param,
3233                                   1);
3234 }
3235
3236 static VAStatus
3237 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3238                              const struct i965_surface *src_surface,
3239                              const VARectangle *src_rect,
3240                              struct i965_surface *dst_surface,
3241                              const VARectangle *dst_rect,
3242                              void *filter_param)
3243 {
3244     return pp_nv12_avs_initialize(ctx, pp_context,
3245                                   src_surface,
3246                                   src_rect,
3247                                   dst_surface,
3248                                   dst_rect,
3249                                   filter_param,
3250                                   0);    
3251 }
3252
3253 static int
3254 gen7_pp_avs_x_steps(void *private_context)
3255 {
3256     struct pp_avs_context *pp_avs_context = private_context;
3257
3258     return pp_avs_context->dest_w / 16;
3259 }
3260
3261 static int
3262 gen7_pp_avs_y_steps(void *private_context)
3263 {
3264     struct pp_avs_context *pp_avs_context = private_context;
3265
3266     return pp_avs_context->dest_h / 16;
3267 }
3268
3269 static int
3270 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3271 {
3272     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3273     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3274
3275     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3276     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3277     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
3278     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3279
3280     return 0;
3281 }
3282
3283 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
3284                                               struct i965_post_processing_context *pp_context,
3285                                               const struct i965_surface *surface)
3286 {
3287     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3288     int fourcc = pp_get_surface_fourcc(ctx, surface);
3289     
3290     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3291         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3292         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3293         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3294     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
3295         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3296         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3297         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3298     }
3299 }
3300
3301 static VAStatus
3302 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3303                            const struct i965_surface *src_surface,
3304                            const VARectangle *src_rect,
3305                            struct i965_surface *dst_surface,
3306                            const VARectangle *dst_rect,
3307                            void *filter_param)
3308 {
3309     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3310     struct i965_driver_data *i965 = i965_driver_data(ctx);
3311     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3312     struct gen7_sampler_8x8 *sampler_8x8;
3313     struct i965_sampler_8x8_state *sampler_8x8_state;
3314     int index, i;
3315     int width[3], height[3], pitch[3], offset[3];
3316     int src_width, src_height;
3317
3318     /* source surface */
3319     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3320                                          width, height, pitch, offset);
3321     src_width = width[0];
3322     src_height = height[0];
3323
3324     /* destination surface */
3325     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3326                                          width, height, pitch, offset);
3327
3328     /* sampler 8x8 state */
3329     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3330     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3331     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3332     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3333     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3334
3335     for (i = 0; i < 17; i++) {
3336         float coff;
3337         coff = i;
3338         coff = coff / 16;
3339         /* for Y channel, currently ignore */
3340         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
3341         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
3342         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
3343         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
3344         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3345         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
3346         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
3347         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
3348         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
3349         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
3350         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
3351         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3352         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3353         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
3354         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
3355         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
3356         /* for U/V channel, 0.25 */
3357         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
3358         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
3359         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
3360         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3361         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3362         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
3363         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
3364         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
3365         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
3366         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
3367         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
3368         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3369         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3370         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
3371         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
3372         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
3373     }
3374
3375     sampler_8x8_state->dw136.default_sharpness_level = 0;
3376     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
3377     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
3378     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
3379     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3380
3381     /* sampler 8x8 */
3382     dri_bo_map(pp_context->sampler_state_table.bo, True);
3383     assert(pp_context->sampler_state_table.bo->virtual);
3384     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3385     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3386
3387     /* sample_8x8 Y index 4 */
3388     index = 4;
3389     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3390     sampler_8x8[index].dw0.global_noise_estimation = 255;
3391     sampler_8x8[index].dw0.ief_bypass = 1;
3392
3393     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3394
3395     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3396     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3397     sampler_8x8[index].dw2.r5x_coefficient = 9;
3398     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3399     sampler_8x8[index].dw2.r5c_coefficient = 3;
3400
3401     sampler_8x8[index].dw3.r3x_coefficient = 27;
3402     sampler_8x8[index].dw3.r3c_coefficient = 5;
3403     sampler_8x8[index].dw3.gain_factor = 40;
3404     sampler_8x8[index].dw3.non_edge_weight = 1;
3405     sampler_8x8[index].dw3.regular_weight = 2;
3406     sampler_8x8[index].dw3.strong_edge_weight = 7;
3407     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3408
3409     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3410                       I915_GEM_DOMAIN_RENDER, 
3411                       0,
3412                       0,
3413                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3414                       pp_context->sampler_state_table.bo_8x8);
3415
3416     /* sample_8x8 UV index 8 */
3417     index = 8;
3418     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3419     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3420     sampler_8x8[index].dw0.global_noise_estimation = 255;
3421     sampler_8x8[index].dw0.ief_bypass = 1;
3422     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3423     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3424     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3425     sampler_8x8[index].dw2.r5x_coefficient = 9;
3426     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3427     sampler_8x8[index].dw2.r5c_coefficient = 3;
3428     sampler_8x8[index].dw3.r3x_coefficient = 27;
3429     sampler_8x8[index].dw3.r3c_coefficient = 5;
3430     sampler_8x8[index].dw3.gain_factor = 40;
3431     sampler_8x8[index].dw3.non_edge_weight = 1;
3432     sampler_8x8[index].dw3.regular_weight = 2;
3433     sampler_8x8[index].dw3.strong_edge_weight = 7;
3434     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3435
3436     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3437                       I915_GEM_DOMAIN_RENDER, 
3438                       0,
3439                       0,
3440                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3441                       pp_context->sampler_state_table.bo_8x8);
3442
3443     /* sampler_8x8 V, index 12 */
3444     index = 12;
3445     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3446     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3447     sampler_8x8[index].dw0.global_noise_estimation = 255;
3448     sampler_8x8[index].dw0.ief_bypass = 1;
3449     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3450     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3451     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3452     sampler_8x8[index].dw2.r5x_coefficient = 9;
3453     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3454     sampler_8x8[index].dw2.r5c_coefficient = 3;
3455     sampler_8x8[index].dw3.r3x_coefficient = 27;
3456     sampler_8x8[index].dw3.r3c_coefficient = 5;
3457     sampler_8x8[index].dw3.gain_factor = 40;
3458     sampler_8x8[index].dw3.non_edge_weight = 1;
3459     sampler_8x8[index].dw3.regular_weight = 2;
3460     sampler_8x8[index].dw3.strong_edge_weight = 7;
3461     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3462
3463     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3464                       I915_GEM_DOMAIN_RENDER, 
3465                       0,
3466                       0,
3467                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3468                       pp_context->sampler_state_table.bo_8x8);
3469
3470     dri_bo_unmap(pp_context->sampler_state_table.bo);
3471
3472     /* private function & data */
3473     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3474     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3475     pp_context->private_context = &pp_context->pp_avs_context;
3476     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3477
3478     pp_avs_context->dest_x = dst_rect->x;
3479     pp_avs_context->dest_y = dst_rect->y;
3480     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3481     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3482     pp_avs_context->src_w = src_rect->width;
3483     pp_avs_context->src_h = src_rect->height;
3484     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3485
3486     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3487     dw = MAX(dw, dst_rect->width);
3488
3489     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3490     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3491     if (IS_HASWELL(i965->intel.device_id))
3492         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3493         
3494     pp_static_parameter->grf2.avs_wa_width = dw;
3495     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
3496     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
3497
3498     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3499     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3500     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3501         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3502     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3503         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3504
3505     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3506
3507     dst_surface->flags = src_surface->flags;
3508
3509     return VA_STATUS_SUCCESS;
3510 }
3511
3512 static VAStatus
3513 gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3514                            const struct i965_surface *src_surface,
3515                            const VARectangle *src_rect,
3516                            struct i965_surface *dst_surface,
3517                            const VARectangle *dst_rect,
3518                            void *filter_param)
3519 {
3520 /* TODO: Add the sampler_8x8 state */
3521     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
3522     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3523     struct gen8_sampler_8x8_avs *sampler_8x8;
3524     struct i965_sampler_8x8_coefficient *sampler_8x8_state;
3525     int i;
3526     int width[3], height[3], pitch[3], offset[3];
3527     int src_width, src_height;
3528
3529     memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
3530
3531     /* source surface */
3532     gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3533                                          width, height, pitch, offset);
3534     src_height = height[0];
3535     src_width  = width[0];
3536
3537     /* destination surface */
3538     gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3539                                          width, height, pitch, offset);
3540
3541     /* sampler 8x8 state */
3542     dri_bo_map(pp_context->sampler_state_table.bo, True);
3543     assert(pp_context->sampler_state_table.bo->virtual);
3544
3545     /* Currently only one gen8 sampler_8x8 is initialized */
3546     sampler_8x8 = (struct gen8_sampler_8x8_avs *)
3547         pp_context->sampler_state_table.bo->virtual;
3548     memset(sampler_8x8, 0, sizeof(*sampler_8x8));
3549
3550     sampler_8x8->dw0.gain_factor = 44;
3551     sampler_8x8->dw0.weak_edge_threshold = 1;
3552     sampler_8x8->dw0.strong_edge_threshold = 8;
3553     /* Use the value like that on Ivy instead of default
3554      * sampler_8x8->dw0.r3x_coefficient = 5;
3555      */
3556     sampler_8x8->dw0.r3x_coefficient = 27;
3557     sampler_8x8->dw0.r3c_coefficient = 5;
3558
3559     sampler_8x8->dw2.global_noise_estimation = 255;
3560     sampler_8x8->dw2.non_edge_weight = 1;
3561     sampler_8x8->dw2.regular_weight = 2;
3562     sampler_8x8->dw2.strong_edge_weight = 7;
3563     /* Use the value like that on Ivy instead of default
3564      * sampler_8x8->dw2.r5x_coefficient = 7;
3565      * sampler_8x8->dw2.r5cx_coefficient = 7;
3566      * sampler_8x8->dw2.r5c_coefficient = 7;
3567      */
3568     sampler_8x8->dw2.r5x_coefficient = 9;
3569     sampler_8x8->dw2.r5cx_coefficient = 8;
3570     sampler_8x8->dw2.r5c_coefficient = 3;
3571
3572     sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */
3573     sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */
3574     sampler_8x8->dw3.sat_max = 0x1f;
3575     sampler_8x8->dw3.hue_max = 14;
3576     /* The 8tap filter will determine whether the adaptive Filter is
3577      * applied for all channels(dw153).
3578      * If the 8tap filter is disabled, the adaptive filter should be disabled.
3579      * Only when 8tap filter is enabled, it can be enabled or not
3580      */
3581     sampler_8x8->dw3.enable_8tap_filter = 3;
3582     sampler_8x8->dw3.ief4_smooth_enable = 0;
3583
3584     sampler_8x8->dw4.s3u = 0;
3585     sampler_8x8->dw4.diamond_margin = 4;
3586     sampler_8x8->dw4.vy_std_enable = 0;
3587     sampler_8x8->dw4.umid = 110;
3588     sampler_8x8->dw4.vmid = 154;
3589
3590     sampler_8x8->dw5.diamond_dv = 0;
3591     sampler_8x8->dw5.diamond_th = 35;
3592     sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */
3593     sampler_8x8->dw5.hs_margin = 3;
3594     sampler_8x8->dw5.diamond_du = 2;
3595
3596     sampler_8x8->dw6.y_point1 = 46;
3597     sampler_8x8->dw6.y_point2 = 47;
3598     sampler_8x8->dw6.y_point3 = 254;
3599     sampler_8x8->dw6.y_point4 = 255;
3600
3601     sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */
3602
3603     sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */
3604     sampler_8x8->dw8.p0l = 46;
3605     sampler_8x8->dw8.p1l = 216;
3606
3607     sampler_8x8->dw9.p2l = 236;
3608     sampler_8x8->dw9.p3l = 236;
3609     sampler_8x8->dw9.b0l = 133;
3610     sampler_8x8->dw9.b1l = 130;
3611
3612     sampler_8x8->dw10.b2l = 130;
3613     sampler_8x8->dw10.b3l = 130;
3614     /* s0l = -5 / 256. s2.8 */
3615     sampler_8x8->dw10.s0l = 1029;    /* s0l = 0 */
3616     sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */
3617
3618     sampler_8x8->dw11.s1l = 0;
3619     sampler_8x8->dw11.s2l = 0;
3620
3621     sampler_8x8->dw12.s3l = 0;
3622     sampler_8x8->dw12.p0u = 46;
3623     sampler_8x8->dw12.p1u = 66;
3624     sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */
3625
3626     sampler_8x8->dw13.p2u = 130;
3627     sampler_8x8->dw13.p3u = 236;
3628     sampler_8x8->dw13.b0u = 143;
3629     sampler_8x8->dw13.b1u = 163;
3630
3631     sampler_8x8->dw14.b2u = 200;
3632     sampler_8x8->dw14.b3u = 140;
3633     sampler_8x8->dw14.s0u = 256;  /* s0u = 0 */
3634
3635     sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
3636     sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
3637
3638     sampler_8x8_state = sampler_8x8->coefficients;
3639
3640      for (i = 0; i < 17; i++) {
3641         float coff;
3642         coff = i;
3643         coff = coff / 16;
3644
3645         memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3646         /* for Y channel, currently ignore */
3647         sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0;
3648         sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0;
3649         sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0;
3650         sampler_8x8_state->dw0.table_0x_filter_c3 =
3651             intel_format_convert(1 - coff, 1, 6, 0);
3652         sampler_8x8_state->dw1.table_0x_filter_c4 =
3653             intel_format_convert(coff, 1, 6, 0);
3654         sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0;
3655         sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0;
3656         sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0;
3657         sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0;
3658         sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0;
3659         sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0;
3660         sampler_8x8_state->dw2.table_0y_filter_c3 =
3661             intel_format_convert(1 - coff, 1, 6, 0);
3662         sampler_8x8_state->dw3.table_0y_filter_c4 =
3663             intel_format_convert(coff, 1, 6, 0);
3664         sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0;
3665         sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0;
3666         sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0;
3667         /* for U/V channel, 0.25 */
3668         sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0;
3669         sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0;
3670         sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0;
3671         sampler_8x8_state->dw4.table_1x_filter_c3 =
3672             intel_format_convert(1 - coff, 1, 6, 0);
3673         sampler_8x8_state->dw5.table_1x_filter_c4 =
3674             intel_format_convert(coff, 1, 6, 0);
3675         sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00;
3676         sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0;
3677         sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0;
3678         sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0;
3679         sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0;
3680         sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0;
3681         sampler_8x8_state->dw6.table_1y_filter_c3 =
3682             intel_format_convert(1 - coff, 1, 6, 0);
3683         sampler_8x8_state->dw7.table_1y_filter_c4 =
3684             intel_format_convert(coff, 1, 6,0);
3685         sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0;
3686         sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0;
3687         sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0;
3688         sampler_8x8_state++;
3689     }
3690
3691     sampler_8x8->dw152.default_sharpness_level = 0;
3692     sampler_8x8->dw153.adaptive_filter_for_all_channel = 1;
3693     sampler_8x8->dw153.bypass_y_adaptive_filtering = 1;
3694     sampler_8x8->dw153.bypass_x_adaptive_filtering = 1;
3695
3696     dri_bo_unmap(pp_context->sampler_state_table.bo);
3697
3698
3699     /* private function & data */
3700     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3701     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3702     pp_context->private_context = &pp_context->pp_avs_context;
3703     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3704
3705     pp_avs_context->dest_x = dst_rect->x;
3706     pp_avs_context->dest_y = dst_rect->y;
3707     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3708     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3709     pp_avs_context->src_w = src_rect->width;
3710     pp_avs_context->src_h = src_rect->height;
3711     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3712
3713     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3714     dw = MAX(dw, dst_rect->width);
3715
3716     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3717     pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */
3718     pp_static_parameter->grf2.avs_wa_width = src_width;
3719     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3720     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3721
3722     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3723     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3724     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3725         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3726     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3727         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3728
3729     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3730
3731     dst_surface->flags = src_surface->flags;
3732
3733     return VA_STATUS_SUCCESS;
3734 }
3735
3736 static VAStatus
3737 gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3738                            const struct i965_surface *src_surface,
3739                            const VARectangle *src_rect,
3740                            struct i965_surface *dst_surface,
3741                            const VARectangle *dst_rect,
3742                            void *filter_param)
3743 {
3744     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3745     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3746     struct gen7_sampler_8x8 *sampler_8x8;
3747     struct i965_sampler_8x8_state *sampler_8x8_state;
3748     int index, i;
3749     int width[3], height[3], pitch[3], offset[3];
3750     int src_width, src_height;
3751
3752     /* source surface */
3753     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3754                                          width, height, pitch, offset);
3755     src_width = width[0];
3756     src_height = height[0];
3757
3758     /* destination surface */
3759     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3760                                          width, height, pitch, offset);
3761
3762     /* sampler 8x8 state */
3763     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3764     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3765     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3766     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3767     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3768
3769     /* The sampler_state setting of RGBX surface will be different with
3770      * that for NV12/I420 surface. 
3771      */
3772     for (i = 0; i < 17; i++) {
3773         float coff;
3774         coff = i;
3775         coff = coff / 16;
3776         /* for Y channel, currently ignore */
3777         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
3778         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
3779         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
3780         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3781         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3782         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
3783         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
3784         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
3785         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
3786         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
3787         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
3788         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3789         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3790         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
3791         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
3792         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
3793         /* for U/V channel, 0.25 */
3794         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
3795         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
3796         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00;
3797         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3798         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3799         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00;
3800         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
3801         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
3802         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
3803         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
3804         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00;
3805         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3806         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3807         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00;
3808         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
3809         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
3810     }
3811
3812     sampler_8x8_state->dw136.default_sharpness_level = 0;
3813     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
3814     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
3815     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
3816     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3817
3818     /* sampler 8x8 */
3819     dri_bo_map(pp_context->sampler_state_table.bo, True);
3820     assert(pp_context->sampler_state_table.bo->virtual);
3821     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3822     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3823
3824     /* sample_8x8 Y index 4 */
3825     index = 4;
3826     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3827     sampler_8x8[index].dw0.global_noise_estimation = 255;
3828     sampler_8x8[index].dw0.ief_bypass = 1;
3829
3830     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3831
3832     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3833     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3834     sampler_8x8[index].dw2.r5x_coefficient = 9;
3835     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3836     sampler_8x8[index].dw2.r5c_coefficient = 3;
3837
3838     sampler_8x8[index].dw3.r3x_coefficient = 27;
3839     sampler_8x8[index].dw3.r3c_coefficient = 5;
3840     sampler_8x8[index].dw3.gain_factor = 40;
3841     sampler_8x8[index].dw3.non_edge_weight = 1;
3842     sampler_8x8[index].dw3.regular_weight = 2;
3843     sampler_8x8[index].dw3.strong_edge_weight = 7;
3844     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3845
3846     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3847                       I915_GEM_DOMAIN_RENDER, 
3848                       0,
3849                       0,
3850                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3851                       pp_context->sampler_state_table.bo_8x8);
3852
3853     /* sample_8x8 UV index 8 */
3854     index = 8;
3855     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3856     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3857     sampler_8x8[index].dw0.global_noise_estimation = 255;
3858     sampler_8x8[index].dw0.ief_bypass = 1;
3859     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3860     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3861     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3862     sampler_8x8[index].dw2.r5x_coefficient = 9;
3863     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3864     sampler_8x8[index].dw2.r5c_coefficient = 3;
3865     sampler_8x8[index].dw3.r3x_coefficient = 27;
3866     sampler_8x8[index].dw3.r3c_coefficient = 5;
3867     sampler_8x8[index].dw3.gain_factor = 40;
3868     sampler_8x8[index].dw3.non_edge_weight = 1;
3869     sampler_8x8[index].dw3.regular_weight = 2;
3870     sampler_8x8[index].dw3.strong_edge_weight = 7;
3871     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3872
3873     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3874                       I915_GEM_DOMAIN_RENDER, 
3875                       0,
3876                       0,
3877                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3878                       pp_context->sampler_state_table.bo_8x8);
3879
3880     /* sampler_8x8 V, index 12 */
3881     index = 12;
3882     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3883     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3884     sampler_8x8[index].dw0.global_noise_estimation = 255;
3885     sampler_8x8[index].dw0.ief_bypass = 1;
3886     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3887     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3888     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3889     sampler_8x8[index].dw2.r5x_coefficient = 9;
3890     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3891     sampler_8x8[index].dw2.r5c_coefficient = 3;
3892     sampler_8x8[index].dw3.r3x_coefficient = 27;
3893     sampler_8x8[index].dw3.r3c_coefficient = 5;
3894     sampler_8x8[index].dw3.gain_factor = 40;
3895     sampler_8x8[index].dw3.non_edge_weight = 1;
3896     sampler_8x8[index].dw3.regular_weight = 2;
3897     sampler_8x8[index].dw3.strong_edge_weight = 7;
3898     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3899
3900     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3901                       I915_GEM_DOMAIN_RENDER, 
3902                       0,
3903                       0,
3904                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3905                       pp_context->sampler_state_table.bo_8x8);
3906
3907     dri_bo_unmap(pp_context->sampler_state_table.bo);
3908
3909     /* private function & data */
3910     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3911     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3912     pp_context->private_context = &pp_context->pp_avs_context;
3913     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3914
3915     pp_avs_context->dest_x = dst_rect->x;
3916     pp_avs_context->dest_y = dst_rect->y;
3917     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3918     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3919     pp_avs_context->src_w = src_rect->width;
3920     pp_avs_context->src_h = src_rect->height;
3921     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3922
3923     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3924     dw = MAX(dw, dst_rect->width);
3925
3926     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3927     pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */
3928     pp_static_parameter->grf2.avs_wa_width = dw;
3929     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
3930     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
3931
3932     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3933     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3934     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3935                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3936     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3937                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3938     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3939
3940     dst_surface->flags = src_surface->flags;
3941
3942     return VA_STATUS_SUCCESS;
3943 }
3944
3945 static int
3946 pp_dndi_x_steps(void *private_context)
3947 {
3948     return 1;
3949 }
3950
3951 static int
3952 pp_dndi_y_steps(void *private_context)
3953 {
3954     struct pp_dndi_context *pp_dndi_context = private_context;
3955
3956     return pp_dndi_context->dest_h / 4;
3957 }
3958
3959 static int
3960 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3961 {
3962     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3963
3964     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3965     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3966
3967     return 0;
3968 }
3969
3970 static VAStatus
3971 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3972                         const struct i965_surface *src_surface,
3973                         const VARectangle *src_rect,
3974                         struct i965_surface *dst_surface,
3975                         const VARectangle *dst_rect,
3976                         void *filter_param)
3977 {
3978     struct i965_driver_data *i965 = i965_driver_data(ctx);
3979     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3980     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3981     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3982     struct object_surface *obj_surface;
3983     struct i965_sampler_dndi *sampler_dndi;
3984     int index;
3985     int w, h;
3986     int orig_w, orig_h;
3987     int dndi_top_first = 1;
3988     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3989
3990     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3991         dndi_top_first = 0;
3992     else
3993         dndi_top_first = 1;
3994
3995     /* surface */
3996     obj_surface = (struct object_surface *)src_surface->base;
3997     orig_w = obj_surface->orig_width;
3998     orig_h = obj_surface->orig_height;
3999     w = obj_surface->width;
4000     h = obj_surface->height;
4001
4002     if (pp_dndi_context->stmm_bo == NULL) {
4003         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4004                                                 "STMM surface",
4005                                                 w * h,
4006                                                 4096);
4007         assert(pp_dndi_context->stmm_bo);
4008     }
4009
4010     /* source UV surface index 2 */
4011     i965_pp_set_surface_state(ctx, pp_context,
4012                               obj_surface->bo, w * h,
4013                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4014                               2, 0);
4015
4016     /* source YUV surface index 4 */
4017     i965_pp_set_surface2_state(ctx, pp_context,
4018                                obj_surface->bo, 0,
4019                                orig_w, orig_h, w,
4020                                0, h,
4021                                SURFACE_FORMAT_PLANAR_420_8, 1,
4022                                4);
4023
4024     /* source STMM surface index 20 */
4025     i965_pp_set_surface_state(ctx, pp_context,
4026                               pp_dndi_context->stmm_bo, 0,
4027                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4028                               20, 1);
4029
4030     /* destination surface */
4031     obj_surface = (struct object_surface *)dst_surface->base;
4032     orig_w = obj_surface->orig_width;
4033     orig_h = obj_surface->orig_height;
4034     w = obj_surface->width;
4035     h = obj_surface->height;
4036
4037     /* destination Y surface index 7 */
4038     i965_pp_set_surface_state(ctx, pp_context,
4039                               obj_surface->bo, 0,
4040                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4041                               7, 1);
4042
4043     /* destination UV surface index 8 */
4044     i965_pp_set_surface_state(ctx, pp_context,
4045                               obj_surface->bo, w * h,
4046                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4047                               8, 1);
4048     /* sampler dndi */
4049     dri_bo_map(pp_context->sampler_state_table.bo, True);
4050     assert(pp_context->sampler_state_table.bo->virtual);
4051     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
4052     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
4053
4054     /* sample dndi index 1 */
4055     index = 0;
4056     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
4057     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
4058     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
4059     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
4060
4061     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
4062     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
4063     sampler_dndi[index].dw1.stmm_c2 = 1;
4064     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
4065     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
4066
4067     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
4068     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
4069     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
4070     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
4071
4072     sampler_dndi[index].dw3.maximum_stmm = 150;
4073     sampler_dndi[index].dw3.multipler_for_vecm = 30;
4074     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
4075     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4076     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
4077
4078     sampler_dndi[index].dw4.sdi_delta = 5;
4079     sampler_dndi[index].dw4.sdi_threshold = 100;
4080     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4081     sampler_dndi[index].dw4.stmm_shift_up = 1;
4082     sampler_dndi[index].dw4.stmm_shift_down = 0;
4083     sampler_dndi[index].dw4.minimum_stmm = 118;
4084
4085     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
4086     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
4087     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
4088     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
4089
4090     sampler_dndi[index].dw6.dn_enable = 1;
4091     sampler_dndi[index].dw6.di_enable = 1;
4092     sampler_dndi[index].dw6.di_partial = 0;
4093     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
4094     sampler_dndi[index].dw6.dndi_stream_id = 0;
4095     sampler_dndi[index].dw6.dndi_first_frame = 1;
4096     sampler_dndi[index].dw6.progressive_dn = 0;
4097     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
4098     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
4099     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
4100
4101     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
4102     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
4103     sampler_dndi[index].dw7.vdi_walker_enable = 0;
4104     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
4105
4106     dri_bo_unmap(pp_context->sampler_state_table.bo);
4107
4108     /* private function & data */
4109     pp_context->pp_x_steps = pp_dndi_x_steps;
4110     pp_context->pp_y_steps = pp_dndi_y_steps;
4111     pp_context->private_context = &pp_context->pp_dndi_context;
4112     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
4113
4114     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
4115     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
4116     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
4117     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
4118
4119     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
4120     pp_inline_parameter->grf5.number_blocks = w / 16;
4121     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4122     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4123
4124     pp_dndi_context->dest_w = w;
4125     pp_dndi_context->dest_h = h;
4126
4127     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
4128
4129     return VA_STATUS_SUCCESS;
4130 }
4131
4132 static int
4133 pp_dn_x_steps(void *private_context)
4134 {
4135     return 1;
4136 }
4137
4138 static int
4139 pp_dn_y_steps(void *private_context)
4140 {
4141     struct pp_dn_context *pp_dn_context = private_context;
4142
4143     return pp_dn_context->dest_h / 8;
4144 }
4145
4146 static int
4147 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
4148 {
4149     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4150
4151     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
4152     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
4153
4154     return 0;
4155 }
4156
4157 static VAStatus
4158 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
4159                       const struct i965_surface *src_surface,
4160                       const VARectangle *src_rect,
4161                       struct i965_surface *dst_surface,
4162                       const VARectangle *dst_rect,
4163                       void *filter_param)
4164 {
4165     struct i965_driver_data *i965 = i965_driver_data(ctx);
4166     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
4167     struct object_surface *obj_surface;
4168     struct i965_sampler_dndi *sampler_dndi;
4169     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
4170     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4171     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
4172     int index;
4173     int w, h;
4174     int orig_w, orig_h;
4175     int dn_strength = 15;
4176     int dndi_top_first = 1;
4177     int dn_progressive = 0;
4178
4179     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
4180         dndi_top_first = 1;
4181         dn_progressive = 1;
4182     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
4183         dndi_top_first = 1;
4184         dn_progressive = 0;
4185     } else {
4186         dndi_top_first = 0;
4187         dn_progressive = 0;
4188     }
4189
4190     if (dn_filter_param) {
4191         float value = dn_filter_param->value;
4192         
4193         if (value > 1.0)
4194             value = 1.0;
4195         
4196         if (value < 0.0)
4197             value = 0.0;
4198
4199         dn_strength = (int)(value * 31.0F);
4200     }
4201
4202     /* surface */
4203     obj_surface = (struct object_surface *)src_surface->base;
4204     orig_w = obj_surface->orig_width;
4205     orig_h = obj_surface->orig_height;
4206     w = obj_surface->width;
4207     h = obj_surface->height;
4208
4209     if (pp_dn_context->stmm_bo == NULL) {
4210         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4211                                               "STMM surface",
4212                                               w * h,
4213                                               4096);
4214         assert(pp_dn_context->stmm_bo);
4215     }
4216
4217     /* source UV surface index 2 */
4218     i965_pp_set_surface_state(ctx, pp_context,
4219                               obj_surface->bo, w * h,
4220                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4221                               2, 0);
4222
4223     /* source YUV surface index 4 */
4224     i965_pp_set_surface2_state(ctx, pp_context,
4225                                obj_surface->bo, 0,
4226                                orig_w, orig_h, w,
4227                                0, h,
4228                                SURFACE_FORMAT_PLANAR_420_8, 1,
4229                                4);
4230
4231     /* source STMM surface index 20 */
4232     i965_pp_set_surface_state(ctx, pp_context,
4233                               pp_dn_context->stmm_bo, 0,
4234                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4235                               20, 1);
4236
4237     /* destination surface */
4238     obj_surface = (struct object_surface *)dst_surface->base;
4239     orig_w = obj_surface->orig_width;
4240     orig_h = obj_surface->orig_height;
4241     w = obj_surface->width;
4242     h = obj_surface->height;
4243
4244     /* destination Y surface index 7 */
4245     i965_pp_set_surface_state(ctx, pp_context,
4246                               obj_surface->bo, 0,
4247                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4248                               7, 1);
4249
4250     /* destination UV surface index 8 */
4251     i965_pp_set_surface_state(ctx, pp_context,
4252                               obj_surface->bo, w * h,
4253                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4254                               8, 1);
4255     /* sampler dn */
4256     dri_bo_map(pp_context->sampler_state_table.bo, True);
4257     assert(pp_context->sampler_state_table.bo->virtual);
4258     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
4259     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
4260
4261     /* sample dndi index 1 */
4262     index = 0;
4263     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
4264     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
4265     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
4266     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
4267
4268     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4269     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
4270     sampler_dndi[index].dw1.stmm_c2 = 0;
4271     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
4272     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
4273
4274     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4275     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
4276     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
4277     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
4278
4279     sampler_dndi[index].dw3.maximum_stmm = 128;
4280     sampler_dndi[index].dw3.multipler_for_vecm = 2;
4281     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4282     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4283     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
4284
4285     sampler_dndi[index].dw4.sdi_delta = 8;
4286     sampler_dndi[index].dw4.sdi_threshold = 128;
4287     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4288     sampler_dndi[index].dw4.stmm_shift_up = 0;
4289     sampler_dndi[index].dw4.stmm_shift_down = 0;
4290     sampler_dndi[index].dw4.minimum_stmm = 0;
4291
4292     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
4293     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
4294     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4295     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4296
4297     sampler_dndi[index].dw6.dn_enable = 1;
4298     sampler_dndi[index].dw6.di_enable = 0;
4299     sampler_dndi[index].dw6.di_partial = 0;
4300     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
4301     sampler_dndi[index].dw6.dndi_stream_id = 1;
4302     sampler_dndi[index].dw6.dndi_first_frame = 1;
4303     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
4304     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
4305     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
4306     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
4307
4308     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4309     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4310     sampler_dndi[index].dw7.vdi_walker_enable = 0;
4311     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
4312
4313     dri_bo_unmap(pp_context->sampler_state_table.bo);
4314
4315     /* private function & data */
4316     pp_context->pp_x_steps = pp_dn_x_steps;
4317     pp_context->pp_y_steps = pp_dn_y_steps;
4318     pp_context->private_context = &pp_context->pp_dn_context;
4319     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
4320
4321     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
4322     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
4323     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
4324     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
4325
4326     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
4327     pp_inline_parameter->grf5.number_blocks = w / 16;
4328     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4329     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4330
4331     pp_dn_context->dest_w = w;
4332     pp_dn_context->dest_h = h;
4333
4334     dst_surface->flags = src_surface->flags;
4335     
4336     return VA_STATUS_SUCCESS;
4337 }
4338
4339 static int
4340 gen7_pp_dndi_x_steps(void *private_context)
4341 {
4342     struct pp_dndi_context *pp_dndi_context = private_context;
4343
4344     return pp_dndi_context->dest_w / 16;
4345 }
4346
4347 static int
4348 gen7_pp_dndi_y_steps(void *private_context)
4349 {
4350     struct pp_dndi_context *pp_dndi_context = private_context;
4351
4352     return pp_dndi_context->dest_h / 4;
4353 }
4354
4355 static int
4356 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
4357 {
4358     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4359
4360     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
4361     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
4362
4363     return 0;
4364 }
4365
4366
4367 extern VAStatus
4368 vpp_surface_convert(VADriverContextP ctx,
4369                     struct object_surface *src_obj_surf,
4370                     struct object_surface *dst_obj_surf);
4371
4372 static VAStatus
4373 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
4374                              const struct i965_surface *src_surface,
4375                              const VARectangle *src_rect,
4376                              struct i965_surface *dst_surface,
4377                              const VARectangle *dst_rect,
4378                              void *filter_param)
4379 {
4380     struct i965_driver_data *i965 = i965_driver_data(ctx);
4381     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
4382     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
4383     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
4384     struct gen7_sampler_dndi *sampler_dndi;
4385     int index;
4386     int w, h;
4387     int orig_w, orig_h;
4388     int dndi_top_first = 1;
4389     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
4390     int is_first_frame = (pp_dndi_context->frame_order == -1);
4391
4392     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
4393         dndi_top_first = 0;
4394     else
4395         dndi_top_first = 1;
4396
4397     /* surface */
4398     current_in_obj_surface = (struct object_surface *)src_surface->base;
4399
4400     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
4401         previous_in_obj_surface = current_in_obj_surface;
4402         is_first_frame = 1;
4403     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
4404         if (pp_dndi_context->frame_order == 0) {
4405             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
4406             if (!pipeline_param ||
4407                 !pipeline_param->num_forward_references ||
4408                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
4409                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
4410
4411                 return VA_STATUS_ERROR_INVALID_PARAMETER;
4412             } else {
4413                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
4414                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
4415
4416                 is_first_frame = 0;
4417             }
4418         } else if (pp_dndi_context->frame_order == 1) {
4419             vpp_surface_convert(ctx,
4420                                 pp_dndi_context->current_out_obj_surface,
4421                                 (struct object_surface *)dst_surface->base);
4422             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
4423             is_first_frame = 0;
4424
4425             return VA_STATUS_SUCCESS_1;
4426         } else {
4427             previous_in_obj_surface = current_in_obj_surface;
4428             is_first_frame = 1;
4429         }
4430     } else {
4431         return VA_STATUS_ERROR_UNIMPLEMENTED;
4432     }
4433
4434     /* source (temporal reference) YUV surface index 4 */
4435     orig_w = previous_in_obj_surface->orig_width;
4436     orig_h = previous_in_obj_surface->orig_height;
4437     w = previous_in_obj_surface->width;
4438     h = previous_in_obj_surface->height;
4439     gen7_pp_set_surface2_state(ctx, pp_context,
4440                                previous_in_obj_surface->bo, 0,
4441                                orig_w, orig_h, w,
4442                                0, h,
4443                                SURFACE_FORMAT_PLANAR_420_8, 1,
4444                                4);
4445
4446     /* source surface */
4447     orig_w = current_in_obj_surface->orig_width;
4448     orig_h = current_in_obj_surface->orig_height;
4449     w = current_in_obj_surface->width;
4450     h = current_in_obj_surface->height;
4451
4452     /* source UV surface index 1 */
4453     gen7_pp_set_surface_state(ctx, pp_context,
4454                               current_in_obj_surface->bo, w * h,
4455                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4456                               1, 0);
4457
4458     /* source YUV surface index 3 */
4459     gen7_pp_set_surface2_state(ctx, pp_context,
4460                                current_in_obj_surface->bo, 0,
4461                                orig_w, orig_h, w,
4462                                0, h,
4463                                SURFACE_FORMAT_PLANAR_420_8, 1,
4464                                3);
4465
4466     /* STMM / History Statistics input surface, index 5 */
4467     if (pp_dndi_context->stmm_bo == NULL) {
4468         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4469                                                 "STMM surface",
4470                                                 w * h,
4471                                                 4096);
4472         assert(pp_dndi_context->stmm_bo);
4473     }
4474
4475     gen7_pp_set_surface_state(ctx, pp_context,
4476                               pp_dndi_context->stmm_bo, 0,
4477                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4478                               5, 1);
4479
4480     /* destination surface */
4481     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
4482     orig_w = previous_out_obj_surface->orig_width;
4483     orig_h = previous_out_obj_surface->orig_height;
4484     w = previous_out_obj_surface->width;
4485     h = previous_out_obj_surface->height;
4486
4487     if (is_first_frame) {
4488         current_out_obj_surface = previous_out_obj_surface;
4489     } else {
4490         VAStatus va_status;
4491
4492         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
4493             unsigned int tiling = 0, swizzle = 0;
4494             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
4495
4496             va_status = i965_CreateSurfaces(ctx,
4497                                             orig_w,
4498                                             orig_h,
4499                                             VA_RT_FORMAT_YUV420,
4500                                             1,
4501                                             &pp_dndi_context->current_out_surface);
4502             assert(va_status == VA_STATUS_SUCCESS);
4503             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
4504             assert(pp_dndi_context->current_out_obj_surface);
4505             i965_check_alloc_surface_bo(ctx,
4506                                         pp_dndi_context->current_out_obj_surface,
4507                                         tiling != I915_TILING_NONE,
4508                                         VA_FOURCC('N','V','1','2'),
4509                                         SUBSAMPLE_YUV420);
4510         }
4511
4512         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
4513     }
4514
4515     /* destination(Previous frame) Y surface index 27 */
4516     gen7_pp_set_surface_state(ctx, pp_context,
4517                               previous_out_obj_surface->bo, 0,
4518                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4519                               27, 1);
4520
4521     /* destination(Previous frame) UV surface index 28 */
4522     gen7_pp_set_surface_state(ctx, pp_context,
4523                               previous_out_obj_surface->bo, w * h,
4524                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4525                               28, 1);
4526
4527     /* destination(Current frame) Y surface index 30 */
4528     gen7_pp_set_surface_state(ctx, pp_context,
4529                               current_out_obj_surface->bo, 0,
4530                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4531                               30, 1);
4532
4533     /* destination(Current frame) UV surface index 31 */
4534     orig_w = current_out_obj_surface->orig_width;
4535     orig_h = current_out_obj_surface->orig_height;
4536     w = current_out_obj_surface->width;
4537     h = current_out_obj_surface->height;
4538
4539     gen7_pp_set_surface_state(ctx, pp_context,
4540                               current_out_obj_surface->bo, w * h,
4541                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4542                               31, 1);
4543
4544     /* STMM output surface, index 33 */
4545     gen7_pp_set_surface_state(ctx, pp_context,
4546                               pp_dndi_context->stmm_bo, 0,
4547                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4548                               33, 1);
4549
4550
4551     /* sampler dndi */
4552     dri_bo_map(pp_context->sampler_state_table.bo, True);
4553     assert(pp_context->sampler_state_table.bo->virtual);
4554     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
4555     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
4556
4557     /* sample dndi index 0 */
4558     index = 0;
4559     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
4560     sampler_dndi[index].dw0.dnmh_delt = 7;
4561     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
4562     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
4563     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
4564     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
4565
4566     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
4567     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
4568     sampler_dndi[index].dw1.stmm_c2 = 2;
4569     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
4570     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
4571
4572     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
4573     sampler_dndi[index].dw2.bne_edge_th = 1;
4574     sampler_dndi[index].dw2.smooth_mv_th = 0;
4575     sampler_dndi[index].dw2.sad_tight_th = 5;
4576     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
4577     sampler_dndi[index].dw2.good_neighbor_th = 12;
4578
4579     sampler_dndi[index].dw3.maximum_stmm = 150;
4580     sampler_dndi[index].dw3.multipler_for_vecm = 30;
4581     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
4582     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4583     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
4584
4585     sampler_dndi[index].dw4.sdi_delta = 5;
4586     sampler_dndi[index].dw4.sdi_threshold = 100;
4587     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4588     sampler_dndi[index].dw4.stmm_shift_up = 1;
4589     sampler_dndi[index].dw4.stmm_shift_down = 0;
4590     sampler_dndi[index].dw4.minimum_stmm = 118;
4591
4592     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
4593     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
4594     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
4595     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
4596     sampler_dndi[index].dw6.dn_enable = 0;
4597     sampler_dndi[index].dw6.di_enable = 1;
4598     sampler_dndi[index].dw6.di_partial = 0;
4599     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
4600     sampler_dndi[index].dw6.dndi_stream_id = 1;
4601     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
4602     sampler_dndi[index].dw6.progressive_dn = 0;
4603     sampler_dndi[index].dw6.mcdi_enable = 0;
4604     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
4605     sampler_dndi[index].dw6.cat_th1 = 0;
4606     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
4607     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
4608
4609     sampler_dndi[index].dw7.sad_tha = 5;
4610     sampler_dndi[index].dw7.sad_thb = 10;
4611     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
4612     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
4613     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
4614     sampler_dndi[index].dw7.vdi_walker_enable = 0;
4615     sampler_dndi[index].dw7.neighborpixel_th = 10;
4616     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
4617
4618     dri_bo_unmap(pp_context->sampler_state_table.bo);
4619
4620     /* private function & data */
4621     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
4622     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
4623     pp_context->private_context = &pp_context->pp_dndi_context;
4624     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
4625
4626     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4627     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4628     pp_static_parameter->grf1.di_top_field_first = 0;
4629     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4630
4631     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4632     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4633     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4634
4635     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4636     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4637
4638     pp_dndi_context->dest_w = w;
4639     pp_dndi_context->dest_h = h;
4640
4641     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
4642
4643     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
4644
4645     return VA_STATUS_SUCCESS;
4646 }
4647
4648 static int
4649 gen7_pp_dn_x_steps(void *private_context)
4650 {
4651     struct pp_dn_context *pp_dn_context = private_context;
4652
4653     return pp_dn_context->dest_w / 16;
4654 }
4655
4656 static int
4657 gen7_pp_dn_y_steps(void *private_context)
4658 {
4659     struct pp_dn_context *pp_dn_context = private_context;
4660
4661     return pp_dn_context->dest_h / 4;
4662 }
4663
4664 static int
4665 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
4666 {
4667     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4668
4669     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
4670     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
4671
4672     return 0;
4673 }
4674
4675 static VAStatus
4676 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
4677                            const struct i965_surface *src_surface,
4678                            const VARectangle *src_rect,
4679                            struct i965_surface *dst_surface,
4680                            const VARectangle *dst_rect,
4681                            void *filter_param)
4682 {
4683     struct i965_driver_data *i965 = i965_driver_data(ctx);
4684     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
4685     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
4686     struct object_surface *obj_surface;
4687     struct gen7_sampler_dndi *sampler_dn;
4688     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
4689     int index;
4690     int w, h;
4691     int orig_w, orig_h;
4692     int dn_strength = 15;
4693     int dndi_top_first = 1;
4694     int dn_progressive = 0;
4695
4696     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
4697         dndi_top_first = 1;
4698         dn_progressive = 1;
4699     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
4700         dndi_top_first = 1;
4701         dn_progressive = 0;
4702     } else {
4703         dndi_top_first = 0;
4704         dn_progressive = 0;
4705     }
4706
4707     if (dn_filter_param) {
4708         float value = dn_filter_param->value;
4709         
4710         if (value > 1.0)
4711             value = 1.0;
4712         
4713         if (value < 0.0)
4714             value = 0.0;
4715
4716         dn_strength = (int)(value * 31.0F);
4717     }
4718
4719     /* surface */
4720     obj_surface = (struct object_surface *)src_surface->base;
4721     orig_w = obj_surface->orig_width;
4722     orig_h = obj_surface->orig_height;
4723     w = obj_surface->width;
4724     h = obj_surface->height;
4725
4726     if (pp_dn_context->stmm_bo == NULL) {
4727         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
4728                                              "STMM surface",
4729                                              w * h,
4730                                              4096);
4731         assert(pp_dn_context->stmm_bo);
4732     }
4733
4734     /* source UV surface index 1 */
4735     gen7_pp_set_surface_state(ctx, pp_context,
4736                               obj_surface->bo, w * h,
4737                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4738                               1, 0);
4739
4740     /* source YUV surface index 3 */
4741     gen7_pp_set_surface2_state(ctx, pp_context,
4742                                obj_surface->bo, 0,
4743                                orig_w, orig_h, w,
4744                                0, h,
4745                                SURFACE_FORMAT_PLANAR_420_8, 1,
4746                                3);
4747
4748     /* source (temporal reference) YUV surface index 4 */
4749     gen7_pp_set_surface2_state(ctx, pp_context,
4750                                obj_surface->bo, 0,
4751                                orig_w, orig_h, w,
4752                                0, h,
4753                                SURFACE_FORMAT_PLANAR_420_8, 1,
4754                                4);
4755
4756     /* STMM / History Statistics input surface, index 5 */
4757     gen7_pp_set_surface_state(ctx, pp_context,
4758                               pp_dn_context->stmm_bo, 0,
4759                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4760                               33, 1);
4761
4762     /* destination surface */
4763     obj_surface = (struct object_surface *)dst_surface->base;
4764     orig_w = obj_surface->orig_width;
4765     orig_h = obj_surface->orig_height;
4766     w = obj_surface->width;
4767     h = obj_surface->height;
4768
4769     /* destination Y surface index 24 */
4770     gen7_pp_set_surface_state(ctx, pp_context,
4771                               obj_surface->bo, 0,
4772                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4773                               24, 1);
4774
4775     /* destination UV surface index 25 */
4776     gen7_pp_set_surface_state(ctx, pp_context,
4777                               obj_surface->bo, w * h,
4778                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4779                               25, 1);
4780
4781     /* sampler dn */
4782     dri_bo_map(pp_context->sampler_state_table.bo, True);
4783     assert(pp_context->sampler_state_table.bo->virtual);
4784     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4785     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4786
4787     /* sample dn index 1 */
4788     index = 0;
4789     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4790     sampler_dn[index].dw0.dnmh_delt = 8;
4791     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4792     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4793     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4794     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4795
4796     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4797     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4798     sampler_dn[index].dw1.stmm_c2 = 0;
4799     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4800     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4801
4802     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4803     sampler_dn[index].dw2.bne_edge_th = 1;
4804     sampler_dn[index].dw2.smooth_mv_th = 0;
4805     sampler_dn[index].dw2.sad_tight_th = 5;
4806     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4807     sampler_dn[index].dw2.good_neighbor_th = 4;
4808
4809     sampler_dn[index].dw3.maximum_stmm = 128;
4810     sampler_dn[index].dw3.multipler_for_vecm = 2;
4811     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4812     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4813     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4814
4815     sampler_dn[index].dw4.sdi_delta = 8;
4816     sampler_dn[index].dw4.sdi_threshold = 128;
4817     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4818     sampler_dn[index].dw4.stmm_shift_up = 0;
4819     sampler_dn[index].dw4.stmm_shift_down = 0;
4820     sampler_dn[index].dw4.minimum_stmm = 0;
4821
4822     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4823     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4824     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4825     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4826
4827     sampler_dn[index].dw6.dn_enable = 1;
4828     sampler_dn[index].dw6.di_enable = 0;
4829     sampler_dn[index].dw6.di_partial = 0;
4830     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4831     sampler_dn[index].dw6.dndi_stream_id = 1;
4832     sampler_dn[index].dw6.dndi_first_frame = 1;
4833     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4834     sampler_dn[index].dw6.mcdi_enable = 0;
4835     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4836     sampler_dn[index].dw6.cat_th1 = 0;
4837     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4838     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4839
4840     sampler_dn[index].dw7.sad_tha = 5;
4841     sampler_dn[index].dw7.sad_thb = 10;
4842     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4843     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4844     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4845     sampler_dn[index].dw7.vdi_walker_enable = 0;
4846     sampler_dn[index].dw7.neighborpixel_th = 10;
4847     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4848
4849     dri_bo_unmap(pp_context->sampler_state_table.bo);
4850
4851     /* private function & data */
4852     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4853     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4854     pp_context->private_context = &pp_context->pp_dn_context;
4855     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4856
4857     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4858     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4859     pp_static_parameter->grf1.di_top_field_first = 0;
4860     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4861
4862     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4863     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4864     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4865
4866     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4867     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4868
4869     pp_dn_context->dest_w = w;
4870     pp_dn_context->dest_h = h;
4871
4872     dst_surface->flags = src_surface->flags;
4873
4874     return VA_STATUS_SUCCESS;
4875 }
4876
4877 static VAStatus
4878 ironlake_pp_initialize(
4879     VADriverContextP ctx,
4880     struct i965_post_processing_context *pp_context,
4881     const struct i965_surface *src_surface,
4882     const VARectangle *src_rect,
4883     struct i965_surface *dst_surface,
4884     const VARectangle *dst_rect,
4885     int pp_index,
4886     void *filter_param
4887 )
4888 {
4889     VAStatus va_status;
4890     struct i965_driver_data *i965 = i965_driver_data(ctx);
4891     struct pp_module *pp_module;
4892     dri_bo *bo;
4893     int static_param_size, inline_param_size;
4894
4895     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4896     bo = dri_bo_alloc(i965->intel.bufmgr,
4897                       "surface state & binding table",
4898                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4899                       4096);
4900     assert(bo);
4901     pp_context->surface_state_binding_table.bo = bo;
4902
4903     dri_bo_unreference(pp_context->curbe.bo);
4904     bo = dri_bo_alloc(i965->intel.bufmgr,
4905                       "constant buffer",
4906                       4096, 
4907                       4096);
4908     assert(bo);
4909     pp_context->curbe.bo = bo;
4910
4911     dri_bo_unreference(pp_context->idrt.bo);
4912     bo = dri_bo_alloc(i965->intel.bufmgr, 
4913                       "interface discriptor", 
4914                       sizeof(struct i965_interface_descriptor), 
4915                       4096);
4916     assert(bo);
4917     pp_context->idrt.bo = bo;
4918     pp_context->idrt.num_interface_descriptors = 0;
4919
4920     dri_bo_unreference(pp_context->sampler_state_table.bo);
4921     bo = dri_bo_alloc(i965->intel.bufmgr, 
4922                       "sampler state table", 
4923                       4096,
4924                       4096);
4925     assert(bo);
4926     dri_bo_map(bo, True);
4927     memset(bo->virtual, 0, bo->size);
4928     dri_bo_unmap(bo);
4929     pp_context->sampler_state_table.bo = bo;
4930
4931     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4932     bo = dri_bo_alloc(i965->intel.bufmgr, 
4933                       "sampler 8x8 state ",
4934                       4096,
4935                       4096);
4936     assert(bo);
4937     pp_context->sampler_state_table.bo_8x8 = bo;
4938
4939     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4940     bo = dri_bo_alloc(i965->intel.bufmgr, 
4941                       "sampler 8x8 state ",
4942                       4096,
4943                       4096);
4944     assert(bo);
4945     pp_context->sampler_state_table.bo_8x8_uv = bo;
4946
4947     dri_bo_unreference(pp_context->vfe_state.bo);
4948     bo = dri_bo_alloc(i965->intel.bufmgr, 
4949                       "vfe state", 
4950                       sizeof(struct i965_vfe_state), 
4951                       4096);
4952     assert(bo);
4953     pp_context->vfe_state.bo = bo;
4954
4955     static_param_size = sizeof(struct pp_static_parameter);
4956     inline_param_size = sizeof(struct pp_inline_parameter);
4957
4958     memset(pp_context->pp_static_parameter, 0, static_param_size);
4959     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4960     
4961     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4962     pp_context->current_pp = pp_index;
4963     pp_module = &pp_context->pp_modules[pp_index];
4964     
4965     if (pp_module->initialize)
4966         va_status = pp_module->initialize(ctx, pp_context,
4967                                           src_surface,
4968                                           src_rect,
4969                                           dst_surface,
4970                                           dst_rect,
4971                                           filter_param);
4972     else
4973         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4974
4975     return va_status;
4976 }
4977
4978 static VAStatus
4979 ironlake_post_processing(
4980     VADriverContextP   ctx,
4981     struct i965_post_processing_context *pp_context,
4982     const struct i965_surface *src_surface,
4983     const VARectangle *src_rect,
4984     struct i965_surface *dst_surface,
4985     const VARectangle *dst_rect,
4986     int                pp_index,
4987     void *filter_param
4988 )
4989 {
4990     VAStatus va_status;
4991
4992     va_status = ironlake_pp_initialize(ctx, pp_context,
4993                                        src_surface,
4994                                        src_rect,
4995                                        dst_surface,
4996                                        dst_rect,
4997                                        pp_index,
4998                                        filter_param);
4999
5000     if (va_status == VA_STATUS_SUCCESS) {
5001         ironlake_pp_states_setup(ctx, pp_context);
5002         ironlake_pp_pipeline_setup(ctx, pp_context);
5003     }
5004
5005     return va_status;
5006 }
5007
5008 static VAStatus
5009 gen6_pp_initialize(
5010     VADriverContextP ctx,
5011     struct i965_post_processing_context *pp_context,
5012     const struct i965_surface *src_surface,
5013     const VARectangle *src_rect,
5014     struct i965_surface *dst_surface,
5015     const VARectangle *dst_rect,
5016     int pp_index,
5017     void *filter_param
5018 )
5019 {
5020     VAStatus va_status;
5021     struct i965_driver_data *i965 = i965_driver_data(ctx);
5022     struct pp_module *pp_module;
5023     dri_bo *bo;
5024     int static_param_size, inline_param_size;
5025
5026     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5027     bo = dri_bo_alloc(i965->intel.bufmgr,
5028                       "surface state & binding table",
5029                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
5030                       4096);
5031     assert(bo);
5032     pp_context->surface_state_binding_table.bo = bo;
5033
5034     dri_bo_unreference(pp_context->curbe.bo);
5035     bo = dri_bo_alloc(i965->intel.bufmgr,
5036                       "constant buffer",
5037                       4096, 
5038                       4096);
5039     assert(bo);
5040     pp_context->curbe.bo = bo;
5041
5042     dri_bo_unreference(pp_context->idrt.bo);
5043     bo = dri_bo_alloc(i965->intel.bufmgr, 
5044                       "interface discriptor", 
5045                       sizeof(struct gen6_interface_descriptor_data), 
5046                       4096);
5047     assert(bo);
5048     pp_context->idrt.bo = bo;
5049     pp_context->idrt.num_interface_descriptors = 0;
5050
5051     dri_bo_unreference(pp_context->sampler_state_table.bo);
5052     bo = dri_bo_alloc(i965->intel.bufmgr, 
5053                       "sampler state table", 
5054                       4096,
5055                       4096);
5056     assert(bo);
5057     dri_bo_map(bo, True);
5058     memset(bo->virtual, 0, bo->size);
5059     dri_bo_unmap(bo);
5060     pp_context->sampler_state_table.bo = bo;
5061
5062     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5063     bo = dri_bo_alloc(i965->intel.bufmgr, 
5064                       "sampler 8x8 state ",
5065                       4096,
5066                       4096);
5067     assert(bo);
5068     pp_context->sampler_state_table.bo_8x8 = bo;
5069
5070     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5071     bo = dri_bo_alloc(i965->intel.bufmgr, 
5072                       "sampler 8x8 state ",
5073                       4096,
5074                       4096);
5075     assert(bo);
5076     pp_context->sampler_state_table.bo_8x8_uv = bo;
5077
5078     dri_bo_unreference(pp_context->vfe_state.bo);
5079     bo = dri_bo_alloc(i965->intel.bufmgr, 
5080                       "vfe state", 
5081                       sizeof(struct i965_vfe_state), 
5082                       4096);
5083     assert(bo);
5084     pp_context->vfe_state.bo = bo;
5085     
5086     if (IS_GEN7(i965->intel.device_id)) {
5087         static_param_size = sizeof(struct gen7_pp_static_parameter);
5088         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
5089     } else {
5090         static_param_size = sizeof(struct pp_static_parameter);
5091         inline_param_size = sizeof(struct pp_inline_parameter);
5092     }
5093
5094     memset(pp_context->pp_static_parameter, 0, static_param_size);
5095     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
5096
5097     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
5098     pp_context->current_pp = pp_index;
5099     pp_module = &pp_context->pp_modules[pp_index];
5100     
5101     if (pp_module->initialize)
5102         va_status = pp_module->initialize(ctx, pp_context,
5103                                           src_surface,
5104                                           src_rect,
5105                                           dst_surface,
5106                                           dst_rect,
5107                                           filter_param);
5108     else
5109         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
5110
5111     calculate_boundary_block_mask(pp_context, dst_rect);
5112
5113     return va_status;
5114 }
5115
5116
5117 static VAStatus
5118 gen8_pp_initialize(
5119     VADriverContextP   ctx,
5120     struct i965_post_processing_context *pp_context,
5121     const struct i965_surface *src_surface,
5122     const VARectangle *src_rect,
5123     struct i965_surface *dst_surface,
5124     const VARectangle *dst_rect,
5125     int                pp_index,
5126     void * filter_param
5127 )
5128 {
5129     VAStatus va_status;
5130     struct i965_driver_data *i965 = i965_driver_data(ctx);
5131     struct pp_module *pp_module;
5132     dri_bo *bo;
5133     int static_param_size, inline_param_size;
5134
5135     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5136     bo = dri_bo_alloc(i965->intel.bufmgr,
5137                       "surface state & binding table",
5138                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
5139                       4096);
5140     assert(bo);
5141     pp_context->surface_state_binding_table.bo = bo;
5142
5143     dri_bo_unreference(pp_context->curbe.bo);
5144     bo = dri_bo_alloc(i965->intel.bufmgr,
5145                       "constant buffer",
5146                       4096, 
5147                       4096);
5148     assert(bo);
5149     pp_context->curbe.bo = bo;
5150
5151     dri_bo_unreference(pp_context->idrt.bo);
5152     bo = dri_bo_alloc(i965->intel.bufmgr, 
5153                       "interface discriptor", 
5154                       sizeof(struct gen8_interface_descriptor_data), 
5155                       4096);
5156     assert(bo);
5157     pp_context->idrt.bo = bo;
5158     pp_context->idrt.num_interface_descriptors = 0;
5159
5160     dri_bo_unreference(pp_context->sampler_state_table.bo);
5161     bo = dri_bo_alloc(i965->intel.bufmgr, 
5162                       "sampler 8x8 state ",
5163                       4096 * 2,
5164                       4096);
5165     assert(bo);
5166     pp_context->sampler_state_table.bo = bo;
5167
5168
5169     dri_bo_unreference(pp_context->vfe_state.bo);
5170     bo = dri_bo_alloc(i965->intel.bufmgr, 
5171                       "vfe state", 
5172                       sizeof(struct i965_vfe_state), 
5173                       4096);
5174     assert(bo);
5175     pp_context->vfe_state.bo = bo;
5176     
5177     static_param_size = sizeof(struct gen7_pp_static_parameter);
5178     inline_param_size = sizeof(struct gen7_pp_inline_parameter);
5179
5180     memset(pp_context->pp_static_parameter, 0, static_param_size);
5181     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
5182
5183     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
5184     pp_context->current_pp = pp_index;
5185     pp_module = &pp_context->pp_modules[pp_index];
5186     
5187     if (pp_module->initialize)
5188         va_status = pp_module->initialize(ctx, pp_context,
5189                                           src_surface,
5190                                           src_rect,
5191                                           dst_surface,
5192                                           dst_rect,
5193                                           filter_param);
5194     else
5195         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
5196  
5197     calculate_boundary_block_mask(pp_context, dst_rect);
5198
5199     return va_status;
5200 }
5201
5202 static void
5203 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
5204                                    struct i965_post_processing_context *pp_context)
5205 {
5206     struct i965_driver_data *i965 = i965_driver_data(ctx);
5207     struct gen6_interface_descriptor_data *desc;
5208     dri_bo *bo;
5209     int pp_index = pp_context->current_pp;
5210
5211     bo = pp_context->idrt.bo;
5212     dri_bo_map(bo, True);
5213     assert(bo->virtual);
5214     desc = bo->virtual;
5215     memset(desc, 0, sizeof(*desc));
5216     desc->desc0.kernel_start_pointer = 
5217         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
5218     desc->desc1.single_program_flow = 1;
5219     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
5220     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
5221     desc->desc2.sampler_state_pointer = 
5222         pp_context->sampler_state_table.bo->offset >> 5;
5223     desc->desc3.binding_table_entry_count = 0;
5224     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
5225     desc->desc4.constant_urb_entry_read_offset = 0;
5226
5227     if (IS_GEN7(i965->intel.device_id))
5228         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
5229     else
5230         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
5231
5232     dri_bo_emit_reloc(bo,
5233                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5234                       0,
5235                       offsetof(struct gen6_interface_descriptor_data, desc0),
5236                       pp_context->pp_modules[pp_index].kernel.bo);
5237
5238     dri_bo_emit_reloc(bo,
5239                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5240                       desc->desc2.sampler_count << 2,
5241                       offsetof(struct gen6_interface_descriptor_data, desc2),
5242                       pp_context->sampler_state_table.bo);
5243
5244     dri_bo_unmap(bo);
5245     pp_context->idrt.num_interface_descriptors++;
5246 }
5247
5248 static void
5249 gen8_pp_interface_descriptor_table(VADriverContextP   ctx,
5250                                    struct i965_post_processing_context *pp_context)
5251 {
5252     struct gen8_interface_descriptor_data *desc;
5253     dri_bo *bo;
5254     int pp_index = pp_context->current_pp;
5255
5256     bo = pp_context->idrt.bo;
5257     dri_bo_map(bo, True);
5258     assert(bo->virtual);
5259     desc = bo->virtual;
5260     memset(desc, 0, sizeof(*desc));
5261     desc->desc0.kernel_start_pointer = 
5262         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
5263     desc->desc2.single_program_flow = 1;
5264     desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754;
5265     desc->desc3.sampler_count = 1;      /* 1 - 4 samplers used */
5266     desc->desc3.sampler_state_pointer = 
5267         pp_context->sampler_state_table.bo->offset >> 5;
5268     desc->desc4.binding_table_entry_count = 0;
5269     desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
5270     desc->desc5.constant_urb_entry_read_offset = 0;
5271     
5272     desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */
5273
5274     dri_bo_emit_reloc(bo,
5275                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5276                       0,
5277                       offsetof(struct gen8_interface_descriptor_data, desc0),
5278                       pp_context->pp_modules[pp_index].kernel.bo);
5279
5280     dri_bo_emit_reloc(bo,
5281                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5282                       desc->desc3.sampler_count << 2,
5283                       offsetof(struct gen8_interface_descriptor_data, desc3),
5284                       pp_context->sampler_state_table.bo);
5285
5286     dri_bo_unmap(bo);
5287     pp_context->idrt.num_interface_descriptors++;
5288 }
5289
5290 static void
5291 gen6_pp_upload_constants(VADriverContextP ctx,
5292                          struct i965_post_processing_context *pp_context)
5293 {
5294     struct i965_driver_data *i965 = i965_driver_data(ctx);
5295     unsigned char *constant_buffer;
5296     int param_size;
5297
5298     assert(sizeof(struct pp_static_parameter) == 128);
5299     assert(sizeof(struct gen7_pp_static_parameter) == 192);
5300
5301     if (IS_GEN7(i965->intel.device_id) ||
5302         IS_GEN8(i965->intel.device_id))
5303         param_size = sizeof(struct gen7_pp_static_parameter);
5304     else
5305         param_size = sizeof(struct pp_static_parameter);
5306
5307     dri_bo_map(pp_context->curbe.bo, 1);
5308     assert(pp_context->curbe.bo->virtual);
5309     constant_buffer = pp_context->curbe.bo->virtual;
5310     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
5311     dri_bo_unmap(pp_context->curbe.bo);
5312 }
5313
5314 static void
5315 gen6_pp_states_setup(VADriverContextP ctx,
5316                      struct i965_post_processing_context *pp_context)
5317 {
5318     gen6_pp_interface_descriptor_table(ctx, pp_context);
5319     gen6_pp_upload_constants(ctx, pp_context);
5320 }
5321
5322 static void
5323 gen8_pp_states_setup(VADriverContextP ctx,
5324                      struct i965_post_processing_context *pp_context)
5325 {
5326     gen8_pp_interface_descriptor_table(ctx, pp_context);
5327     gen6_pp_upload_constants(ctx, pp_context);
5328 }
5329
5330 static void
5331 gen6_pp_pipeline_select(VADriverContextP ctx,
5332                         struct i965_post_processing_context *pp_context)
5333 {
5334     struct intel_batchbuffer *batch = pp_context->batch;
5335
5336     BEGIN_BATCH(batch, 1);
5337     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
5338     ADVANCE_BATCH(batch);
5339 }
5340
5341 static void
5342 gen6_pp_state_base_address(VADriverContextP ctx,
5343                            struct i965_post_processing_context *pp_context)
5344 {
5345     struct intel_batchbuffer *batch = pp_context->batch;
5346
5347     BEGIN_BATCH(batch, 10);
5348     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
5349     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5350     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
5351     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5352     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5353     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5354     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5355     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5356     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5357     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5358     ADVANCE_BATCH(batch);
5359 }
5360
5361 static void
5362 gen8_pp_state_base_address(VADriverContextP ctx,
5363                            struct i965_post_processing_context *pp_context)
5364 {
5365     struct intel_batchbuffer *batch = pp_context->batch;
5366
5367     BEGIN_BATCH(batch, 16);
5368     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
5369         /* DW1 Generate state address */
5370     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5371         OUT_BATCH(batch, 0);
5372         OUT_BATCH(batch, 0);
5373         /* DW4. Surface state address */
5374     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
5375         OUT_BATCH(batch, 0);
5376         /* DW6. Dynamic state address */
5377     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5378         OUT_BATCH(batch, 0);
5379
5380         /* DW8. Indirect object address */
5381     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5382         OUT_BATCH(batch, 0);
5383
5384         /* DW10. Instruction base address */
5385     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5386         OUT_BATCH(batch, 0);
5387
5388     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5389     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5390     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5391     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5392     ADVANCE_BATCH(batch);
5393 }
5394
5395 static void
5396 gen6_pp_vfe_state(VADriverContextP ctx,
5397                   struct i965_post_processing_context *pp_context)
5398 {
5399     struct intel_batchbuffer *batch = pp_context->batch;
5400
5401     BEGIN_BATCH(batch, 8);
5402     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
5403     OUT_BATCH(batch, 0);
5404     OUT_BATCH(batch,
5405               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
5406               pp_context->vfe_gpu_state.num_urb_entries << 8);
5407     OUT_BATCH(batch, 0);
5408     OUT_BATCH(batch,
5409               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
5410                 /* URB Entry Allocation Size, in 256 bits unit */
5411               (pp_context->vfe_gpu_state.curbe_allocation_size));
5412                 /* CURBE Allocation Size, in 256 bits unit */
5413     OUT_BATCH(batch, 0);
5414     OUT_BATCH(batch, 0);
5415     OUT_BATCH(batch, 0);
5416     ADVANCE_BATCH(batch);
5417 }
5418
5419 static void
5420 gen8_pp_vfe_state(VADriverContextP ctx,
5421                   struct i965_post_processing_context *pp_context)
5422 {
5423     struct intel_batchbuffer *batch = pp_context->batch;
5424
5425     BEGIN_BATCH(batch, 9);
5426     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
5427     OUT_BATCH(batch, 0);
5428     OUT_BATCH(batch, 0);
5429     OUT_BATCH(batch,
5430               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
5431               pp_context->vfe_gpu_state.num_urb_entries << 8);
5432     OUT_BATCH(batch, 0);
5433     OUT_BATCH(batch,
5434               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
5435                 /* URB Entry Allocation Size, in 256 bits unit */
5436               (pp_context->vfe_gpu_state.curbe_allocation_size));
5437                 /* CURBE Allocation Size, in 256 bits unit */
5438     OUT_BATCH(batch, 0);
5439     OUT_BATCH(batch, 0);
5440     OUT_BATCH(batch, 0);
5441     ADVANCE_BATCH(batch);
5442 }
5443
5444 static void
5445 gen6_pp_curbe_load(VADriverContextP ctx,
5446                    struct i965_post_processing_context *pp_context)
5447 {
5448     struct intel_batchbuffer *batch = pp_context->batch;
5449     struct i965_driver_data *i965 = i965_driver_data(ctx);
5450     int param_size;
5451
5452     if (IS_GEN7(i965->intel.device_id) ||
5453         IS_GEN8(i965->intel.device_id))
5454         param_size = sizeof(struct gen7_pp_static_parameter);
5455     else
5456         param_size = sizeof(struct pp_static_parameter);
5457
5458     BEGIN_BATCH(batch, 4);
5459     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
5460     OUT_BATCH(batch, 0);
5461     OUT_BATCH(batch,
5462               param_size);
5463     OUT_RELOC(batch, 
5464               pp_context->curbe.bo,
5465               I915_GEM_DOMAIN_INSTRUCTION, 0,
5466               0);
5467     ADVANCE_BATCH(batch);
5468 }
5469
5470 static void
5471 gen6_interface_descriptor_load(VADriverContextP ctx,
5472                                struct i965_post_processing_context *pp_context)
5473 {
5474     struct intel_batchbuffer *batch = pp_context->batch;
5475
5476     BEGIN_BATCH(batch, 4);
5477     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
5478     OUT_BATCH(batch, 0);
5479     OUT_BATCH(batch,
5480               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
5481     OUT_RELOC(batch, 
5482               pp_context->idrt.bo,
5483               I915_GEM_DOMAIN_INSTRUCTION, 0,
5484               0);
5485     ADVANCE_BATCH(batch);
5486 }
5487
5488 static void
5489 gen8_interface_descriptor_load(VADriverContextP ctx,
5490                                struct i965_post_processing_context *pp_context)
5491 {
5492     struct intel_batchbuffer *batch = pp_context->batch;
5493
5494     BEGIN_BATCH(batch, 4);
5495     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
5496     OUT_BATCH(batch, 0);
5497     OUT_BATCH(batch,
5498               pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data));
5499     OUT_RELOC(batch, 
5500               pp_context->idrt.bo,
5501               I915_GEM_DOMAIN_INSTRUCTION, 0,
5502               0);
5503     ADVANCE_BATCH(batch);
5504 }
5505
5506 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
5507 {
5508     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
5509
5510     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
5511     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
5512     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
5513     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
5514     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
5515     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
5516
5517     /* 1 x N */
5518     if (x_steps == 1) {
5519         if (y == y_steps-1) {
5520             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
5521         }
5522         else {
5523             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
5524         }
5525     }
5526
5527     /* M x 1 */
5528     if (y_steps == 1) {
5529         if (x == 0) { // all blocks in this group are on the left edge
5530             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
5531             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
5532         }
5533         else if (x == x_steps-1) {
5534             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
5535             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
5536         }
5537         else {
5538             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
5539             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
5540             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
5541         }
5542     }
5543
5544 }
5545
5546 static void
5547 gen6_pp_object_walker(VADriverContextP ctx,
5548                       struct i965_post_processing_context *pp_context)
5549 {
5550     struct i965_driver_data *i965 = i965_driver_data(ctx);
5551     struct intel_batchbuffer *batch = pp_context->batch;
5552     int x, x_steps, y, y_steps;
5553     int param_size, command_length_in_dws;
5554     dri_bo *command_buffer;
5555     unsigned int *command_ptr;
5556
5557     if (IS_GEN7(i965->intel.device_id) ||
5558         IS_GEN8(i965->intel.device_id))
5559         param_size = sizeof(struct gen7_pp_inline_parameter);
5560     else
5561         param_size = sizeof(struct pp_inline_parameter);
5562
5563     x_steps = pp_context->pp_x_steps(pp_context->private_context);
5564     y_steps = pp_context->pp_y_steps(pp_context->private_context);
5565     command_length_in_dws = 6 + (param_size >> 2);
5566     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
5567                                   "command objects buffer",
5568                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
5569                                   4096);
5570
5571     dri_bo_map(command_buffer, 1);
5572     command_ptr = command_buffer->virtual;
5573
5574     for (y = 0; y < y_steps; y++) {
5575         for (x = 0; x < x_steps; x++) {
5576             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
5577                 // some common block parameter update goes here, apply to all pp functions
5578                 if (IS_GEN6(i965->intel.device_id))
5579                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
5580                 
5581                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
5582                 *command_ptr++ = 0;
5583                 *command_ptr++ = 0;
5584                 *command_ptr++ = 0;
5585                 *command_ptr++ = 0;
5586                 *command_ptr++ = 0;
5587                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
5588                 command_ptr += (param_size >> 2);
5589             }
5590         }
5591     }
5592
5593     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
5594         *command_ptr++ = 0;
5595
5596     *command_ptr = MI_BATCH_BUFFER_END;
5597
5598     dri_bo_unmap(command_buffer);
5599
5600     if (IS_GEN8(i965->intel.device_id)) {
5601         BEGIN_BATCH(batch, 3);
5602         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
5603         OUT_RELOC(batch, command_buffer, 
5604               I915_GEM_DOMAIN_COMMAND, 0, 
5605               0);
5606         OUT_BATCH(batch, 0);
5607         ADVANCE_BATCH(batch);
5608     } else {
5609         BEGIN_BATCH(batch, 2);
5610         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
5611         OUT_RELOC(batch, command_buffer, 
5612               I915_GEM_DOMAIN_COMMAND, 0, 
5613               0);
5614         ADVANCE_BATCH(batch);
5615     }
5616     
5617     dri_bo_unreference(command_buffer);
5618
5619     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
5620      * will cause control to pass back to ring buffer 
5621      */
5622     intel_batchbuffer_end_atomic(batch);
5623     intel_batchbuffer_flush(batch);
5624     intel_batchbuffer_start_atomic(batch, 0x1000);
5625 }
5626
5627 static void
5628 gen6_pp_pipeline_setup(VADriverContextP ctx,
5629                        struct i965_post_processing_context *pp_context)
5630 {
5631     struct intel_batchbuffer *batch = pp_context->batch;
5632
5633     intel_batchbuffer_start_atomic(batch, 0x1000);
5634     intel_batchbuffer_emit_mi_flush(batch);
5635     gen6_pp_pipeline_select(ctx, pp_context);
5636     gen6_pp_state_base_address(ctx, pp_context);
5637     gen6_pp_vfe_state(ctx, pp_context);
5638     gen6_pp_curbe_load(ctx, pp_context);
5639     gen6_interface_descriptor_load(ctx, pp_context);
5640     gen6_pp_object_walker(ctx, pp_context);
5641     intel_batchbuffer_end_atomic(batch);
5642 }
5643
5644 static void
5645 gen8_pp_pipeline_setup(VADriverContextP ctx,
5646                        struct i965_post_processing_context *pp_context)
5647 {
5648     struct intel_batchbuffer *batch = pp_context->batch;
5649
5650     intel_batchbuffer_start_atomic(batch, 0x1000);
5651     intel_batchbuffer_emit_mi_flush(batch);
5652     gen6_pp_pipeline_select(ctx, pp_context);
5653     gen8_pp_state_base_address(ctx, pp_context);
5654     gen8_pp_vfe_state(ctx, pp_context);
5655     gen6_pp_curbe_load(ctx, pp_context);
5656     gen8_interface_descriptor_load(ctx, pp_context);
5657     gen8_pp_vfe_state(ctx, pp_context);
5658     gen6_pp_object_walker(ctx, pp_context);
5659     intel_batchbuffer_end_atomic(batch);
5660 }
5661
5662 static VAStatus
5663 gen6_post_processing(
5664     VADriverContextP ctx,
5665     struct i965_post_processing_context *pp_context,
5666     const struct i965_surface *src_surface,
5667     const VARectangle *src_rect,
5668     struct i965_surface *dst_surface,
5669     const VARectangle *dst_rect,
5670     int pp_index,
5671     void *filter_param
5672 )
5673 {
5674     VAStatus va_status;
5675     
5676     va_status = gen6_pp_initialize(ctx, pp_context,
5677                                    src_surface,
5678                                    src_rect,
5679                                    dst_surface,
5680                                    dst_rect,
5681                                    pp_index,
5682                                    filter_param);
5683
5684     if (va_status == VA_STATUS_SUCCESS) {
5685         gen6_pp_states_setup(ctx, pp_context);
5686         gen6_pp_pipeline_setup(ctx, pp_context);
5687     }
5688
5689     if (va_status == VA_STATUS_SUCCESS_1)
5690         va_status = VA_STATUS_SUCCESS;
5691
5692     return va_status;
5693 }
5694
5695 static VAStatus
5696 gen8_post_processing(
5697     VADriverContextP   ctx,
5698     struct i965_post_processing_context *pp_context,
5699     const struct i965_surface *src_surface,
5700     const VARectangle *src_rect,
5701     struct i965_surface *dst_surface,
5702     const VARectangle *dst_rect,
5703     int                pp_index,
5704     void * filter_param
5705 )
5706 {
5707     VAStatus va_status;
5708     
5709     va_status = gen8_pp_initialize(ctx, pp_context,
5710                                    src_surface,
5711                                    src_rect,
5712                                    dst_surface,
5713                                    dst_rect,
5714                                    pp_index,
5715                                    filter_param);
5716
5717     if (va_status == VA_STATUS_SUCCESS) {
5718         gen8_pp_states_setup(ctx, pp_context);
5719         gen8_pp_pipeline_setup(ctx, pp_context);
5720     }
5721
5722     return va_status;
5723 }
5724
5725 static VAStatus
5726 i965_post_processing_internal(
5727     VADriverContextP   ctx,
5728     struct i965_post_processing_context *pp_context,
5729     const struct i965_surface *src_surface,
5730     const VARectangle *src_rect,
5731     struct i965_surface *dst_surface,
5732     const VARectangle *dst_rect,
5733     int                pp_index,
5734     void *filter_param
5735 )
5736 {
5737     VAStatus va_status;
5738     struct i965_driver_data *i965 = i965_driver_data(ctx);
5739
5740     if (IS_GEN8(i965->intel.device_id))
5741         va_status = gen8_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5742     else if (IS_GEN6(i965->intel.device_id) ||
5743         IS_GEN7(i965->intel.device_id))
5744         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5745     else
5746         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5747     
5748     return va_status;
5749 }
5750
5751 static void
5752 rgb_to_yuv(unsigned int argb,
5753            unsigned char *y,
5754            unsigned char *u,
5755            unsigned char *v,
5756            unsigned char *a)
5757 {
5758     int r = ((argb >> 16) & 0xff);
5759     int g = ((argb >> 8) & 0xff);
5760     int b = ((argb >> 0) & 0xff);
5761     
5762     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
5763     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
5764     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
5765     *a = ((argb >> 24) & 0xff);
5766 }
5767
5768 static void 
5769 i965_vpp_clear_surface(VADriverContextP ctx,
5770                        struct i965_post_processing_context *pp_context,
5771                        struct object_surface *obj_surface,
5772                        unsigned int color)
5773 {
5774     struct i965_driver_data *i965 = i965_driver_data(ctx);
5775     struct intel_batchbuffer *batch = pp_context->batch;
5776     unsigned int blt_cmd, br13;
5777     unsigned int tiling = 0, swizzle = 0;
5778     int pitch;
5779     unsigned char y, u, v, a = 0;
5780     int region_width, region_height;
5781
5782     /* Currently only support NV12 surface */
5783     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
5784         return;
5785
5786     rgb_to_yuv(color, &y, &u, &v, &a);
5787
5788     if (a == 0)
5789         return;
5790
5791     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5792     blt_cmd = XY_COLOR_BLT_CMD;
5793     pitch = obj_surface->width;
5794
5795     if (tiling != I915_TILING_NONE) {
5796         assert(tiling == I915_TILING_Y);
5797         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
5798         // pitch >>= 2;
5799     }
5800
5801     br13 = 0xf0 << 16;
5802     br13 |= BR13_8;
5803     br13 |= pitch;
5804
5805     if (IS_GEN6(i965->intel.device_id) ||
5806         IS_GEN7(i965->intel.device_id) ||
5807         IS_GEN8(i965->intel.device_id)) {
5808         intel_batchbuffer_start_atomic_blt(batch, 48);
5809         BEGIN_BLT_BATCH(batch, 12);
5810     } else {
5811         intel_batchbuffer_start_atomic(batch, 48);
5812         BEGIN_BATCH(batch, 12);
5813     }
5814
5815     region_width = obj_surface->width;
5816     region_height = obj_surface->height;
5817
5818     OUT_BATCH(batch, blt_cmd);
5819     OUT_BATCH(batch, br13);
5820     OUT_BATCH(batch,
5821               0 << 16 |
5822               0);
5823     OUT_BATCH(batch,
5824               region_height << 16 |
5825               region_width);
5826     OUT_RELOC(batch, obj_surface->bo, 
5827               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
5828               0);
5829     OUT_BATCH(batch, y);
5830
5831     br13 = 0xf0 << 16;
5832     br13 |= BR13_565;
5833     br13 |= pitch;
5834
5835     region_width = obj_surface->width / 2;
5836     region_height = obj_surface->height / 2;
5837
5838     if (tiling == I915_TILING_Y) {
5839         region_height = ALIGN(obj_surface->height / 2, 32);
5840     }
5841
5842     OUT_BATCH(batch, blt_cmd);
5843     OUT_BATCH(batch, br13);
5844     OUT_BATCH(batch,
5845               0 << 16 |
5846               0);
5847     OUT_BATCH(batch,
5848               region_height << 16 |
5849               region_width);
5850     OUT_RELOC(batch, obj_surface->bo, 
5851               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
5852               obj_surface->width * obj_surface->y_cb_offset);
5853     OUT_BATCH(batch, v << 8 | u);
5854
5855     ADVANCE_BATCH(batch);
5856     intel_batchbuffer_end_atomic(batch);
5857 }
5858
5859 VAStatus
5860 i965_scaling_processing(
5861     VADriverContextP   ctx,
5862     struct object_surface *src_surface_obj,
5863     const VARectangle *src_rect,
5864     struct object_surface *dst_surface_obj,
5865     const VARectangle *dst_rect,
5866     unsigned int       flags)
5867 {
5868     VAStatus va_status = VA_STATUS_SUCCESS;
5869     struct i965_driver_data *i965 = i965_driver_data(ctx);
5870  
5871     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
5872     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
5873
5874     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
5875         struct i965_surface src_surface;
5876         struct i965_surface dst_surface;
5877
5878          _i965LockMutex(&i965->pp_mutex);
5879
5880          src_surface.base = (struct object_base *)src_surface_obj;
5881          src_surface.type = I965_SURFACE_TYPE_SURFACE;
5882          src_surface.flags = I965_SURFACE_FLAG_FRAME;
5883          dst_surface.base = (struct object_base *)dst_surface_obj;
5884          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5885          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5886
5887          va_status = i965_post_processing_internal(ctx, i965->pp_context,
5888                                                    &src_surface,
5889                                                    src_rect,
5890                                                    &dst_surface,
5891                                                    dst_rect,
5892                                                    PP_NV12_AVS,
5893                                                    NULL);
5894
5895          _i965UnlockMutex(&i965->pp_mutex);
5896     }
5897
5898     return va_status;
5899 }
5900
5901 VASurfaceID
5902 i965_post_processing(
5903     VADriverContextP   ctx,
5904     struct object_surface *obj_surface,
5905     const VARectangle *src_rect,
5906     const VARectangle *dst_rect,
5907     unsigned int       flags,
5908     int               *has_done_scaling  
5909 )
5910 {
5911     struct i965_driver_data *i965 = i965_driver_data(ctx);
5912     VASurfaceID out_surface_id = VA_INVALID_ID;
5913     VASurfaceID tmp_id = VA_INVALID_ID;
5914     
5915     *has_done_scaling = 0;
5916
5917     if (HAS_PP(i965)) {
5918         VAStatus status;
5919         struct i965_surface src_surface;
5920         struct i965_surface dst_surface;
5921
5922         /* Currently only support post processing for NV12 surface */
5923         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
5924             return out_surface_id;
5925
5926         _i965LockMutex(&i965->pp_mutex);
5927
5928         if (flags & I965_PP_FLAG_MCDI) {
5929             src_surface.base = (struct object_base *)obj_surface;
5930             src_surface.type = I965_SURFACE_TYPE_SURFACE;
5931             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
5932                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
5933
5934             status = i965_CreateSurfaces(ctx,
5935                                          obj_surface->orig_width,
5936                                          obj_surface->orig_height,
5937                                          VA_RT_FORMAT_YUV420,
5938                                          1,
5939                                          &out_surface_id);
5940             assert(status == VA_STATUS_SUCCESS);
5941             obj_surface = SURFACE(out_surface_id);
5942             assert(obj_surface);
5943             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5944             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
5945
5946             dst_surface.base = (struct object_base *)obj_surface;
5947             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5948             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5949
5950             i965_post_processing_internal(ctx, i965->pp_context,
5951                                           &src_surface,
5952                                           src_rect,
5953                                           &dst_surface,
5954                                           dst_rect,
5955                                           PP_NV12_DNDI,
5956                                           NULL);
5957         }
5958
5959         if (flags & I965_PP_FLAG_AVS) {
5960             struct i965_render_state *render_state = &i965->render_state;
5961             struct intel_region *dest_region = render_state->draw_region;
5962
5963             if (out_surface_id != VA_INVALID_ID)
5964                 tmp_id = out_surface_id;
5965
5966             src_surface.base = (struct object_base *)obj_surface;
5967             src_surface.type = I965_SURFACE_TYPE_SURFACE;
5968             src_surface.flags = I965_SURFACE_FLAG_FRAME;
5969
5970             status = i965_CreateSurfaces(ctx,
5971                                          dest_region->width,
5972                                          dest_region->height,
5973                                          VA_RT_FORMAT_YUV420,
5974                                          1,
5975                                          &out_surface_id);
5976             assert(status == VA_STATUS_SUCCESS);
5977             obj_surface = SURFACE(out_surface_id);
5978             assert(obj_surface);
5979             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5980             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
5981
5982             dst_surface.base = (struct object_base *)obj_surface;
5983             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5984             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5985
5986             i965_post_processing_internal(ctx, i965->pp_context,
5987                                           &src_surface,
5988                                           src_rect,
5989                                           &dst_surface,
5990                                           dst_rect,
5991                                           PP_NV12_AVS,
5992                                           NULL);
5993
5994             if (tmp_id != VA_INVALID_ID)
5995                 i965_DestroySurfaces(ctx, &tmp_id, 1);
5996                 
5997             *has_done_scaling = 1;
5998         }
5999
6000         _i965UnlockMutex(&i965->pp_mutex);
6001     }
6002
6003     return out_surface_id;
6004 }       
6005
6006 static VAStatus
6007 i965_image_pl2_processing(VADriverContextP ctx,
6008                           const struct i965_surface *src_surface,
6009                           const VARectangle *src_rect,
6010                           struct i965_surface *dst_surface,
6011                           const VARectangle *dst_rect);
6012
6013 static VAStatus
6014 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
6015                                    VAStatus (*i965_image_plx_nv12_processing)(
6016                                        VADriverContextP,
6017                                        const struct i965_surface *,
6018                                        const VARectangle *,
6019                                        struct i965_surface *,
6020                                        const VARectangle *),
6021                                    const struct i965_surface *src_surface,
6022                                    const VARectangle *src_rect,
6023                                    struct i965_surface *dst_surface,
6024                                    const VARectangle *dst_rect)
6025 {
6026     struct i965_driver_data *i965 = i965_driver_data(ctx);
6027     VAStatus status;
6028     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
6029     struct object_surface *obj_surface = NULL;
6030     struct i965_surface tmp_surface;
6031     int width, height;
6032
6033     pp_get_surface_size(ctx, dst_surface, &width, &height);
6034     status = i965_CreateSurfaces(ctx,
6035                                  width,
6036                                  height,
6037                                  VA_RT_FORMAT_YUV420,
6038                                  1,
6039                                  &tmp_surface_id);
6040     assert(status == VA_STATUS_SUCCESS);
6041     obj_surface = SURFACE(tmp_surface_id);
6042     assert(obj_surface);
6043     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
6044
6045     tmp_surface.base = (struct object_base *)obj_surface;
6046     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
6047     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
6048
6049     status = i965_image_plx_nv12_processing(ctx,
6050                                             src_surface,
6051                                             src_rect,
6052                                             &tmp_surface,
6053                                             dst_rect);
6054
6055     if (status == VA_STATUS_SUCCESS)
6056         status = i965_image_pl2_processing(ctx,
6057                                            &tmp_surface,
6058                                            dst_rect,
6059                                            dst_surface,
6060                                            dst_rect);
6061
6062     i965_DestroySurfaces(ctx,
6063                          &tmp_surface_id,
6064                          1);
6065
6066     return status;
6067 }
6068
6069
6070 static VAStatus
6071 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
6072                                const struct i965_surface *src_surface,
6073                                const VARectangle *src_rect,
6074                                struct i965_surface *dst_surface,
6075                                const VARectangle *dst_rect)
6076 {
6077     struct i965_driver_data *i965 = i965_driver_data(ctx);
6078     struct i965_post_processing_context *pp_context = i965->pp_context;
6079     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6080     VAStatus vaStatus;
6081
6082     switch (fourcc) {
6083     case VA_FOURCC('N', 'V', '1', '2'):
6084         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6085                                                  src_surface,
6086                                                  src_rect,
6087                                                  dst_surface,
6088                                                  dst_rect,
6089                                                  PP_RGBX_LOAD_SAVE_NV12,
6090                                                  NULL);
6091         intel_batchbuffer_flush(pp_context->batch);
6092         break;
6093
6094     default:
6095         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6096                                                       i965_image_pl1_rgbx_processing,
6097                                                       src_surface,
6098                                                       src_rect,
6099                                                       dst_surface,
6100                                                       dst_rect);
6101         break;
6102     }
6103
6104     return vaStatus;
6105 }
6106
6107 static VAStatus
6108 i965_image_pl3_processing(VADriverContextP ctx,
6109                           const struct i965_surface *src_surface,
6110                           const VARectangle *src_rect,
6111                           struct i965_surface *dst_surface,
6112                           const VARectangle *dst_rect)
6113 {
6114     struct i965_driver_data *i965 = i965_driver_data(ctx);
6115     struct i965_post_processing_context *pp_context = i965->pp_context;
6116     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6117     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
6118
6119     switch (fourcc) {
6120     case VA_FOURCC('N', 'V', '1', '2'):
6121         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6122                                                  src_surface,
6123                                                  src_rect,
6124                                                  dst_surface,
6125                                                  dst_rect,
6126                                                  PP_PL3_LOAD_SAVE_N12,
6127                                                  NULL);
6128         intel_batchbuffer_flush(pp_context->batch);
6129         break;
6130
6131     case VA_FOURCC('I', 'M', 'C', '1'):
6132     case VA_FOURCC('I', 'M', 'C', '3'):
6133     case VA_FOURCC('Y', 'V', '1', '2'):
6134     case VA_FOURCC('I', '4', '2', '0'):
6135         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6136                                                  src_surface,
6137                                                  src_rect,
6138                                                  dst_surface,
6139                                                  dst_rect,
6140                                                  PP_PL3_LOAD_SAVE_PL3,
6141                                                  NULL);
6142         intel_batchbuffer_flush(pp_context->batch);
6143         break;
6144
6145     case VA_FOURCC('Y', 'U', 'Y', '2'):
6146     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6147         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6148                                                  src_surface,
6149                                                  src_rect,
6150                                                  dst_surface,
6151                                                  dst_rect,
6152                                                  PP_PL3_LOAD_SAVE_PA,
6153                                                  NULL);
6154         intel_batchbuffer_flush(pp_context->batch);
6155         break;
6156
6157     default:
6158         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6159                                                       i965_image_pl3_processing,
6160                                                       src_surface,
6161                                                       src_rect,
6162                                                       dst_surface,
6163                                                       dst_rect);
6164         break;
6165     }
6166
6167     return vaStatus;
6168 }
6169
6170 static VAStatus
6171 i965_image_pl2_processing(VADriverContextP ctx,
6172                           const struct i965_surface *src_surface,
6173                           const VARectangle *src_rect,
6174                           struct i965_surface *dst_surface,
6175                           const VARectangle *dst_rect)
6176 {
6177     struct i965_driver_data *i965 = i965_driver_data(ctx);
6178     struct i965_post_processing_context *pp_context = i965->pp_context;
6179     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6180     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
6181
6182     switch (fourcc) {
6183     case VA_FOURCC('N', 'V', '1', '2'):
6184         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6185                                                  src_surface,
6186                                                  src_rect,
6187                                                  dst_surface,
6188                                                  dst_rect,
6189                                                  PP_NV12_LOAD_SAVE_N12,
6190                                                  NULL);
6191         break;
6192
6193     case VA_FOURCC('I', 'M', 'C', '1'):
6194     case VA_FOURCC('I', 'M', 'C', '3'):
6195     case VA_FOURCC('Y', 'V', '1', '2'):
6196     case VA_FOURCC('I', '4', '2', '0'):
6197         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6198                                                  src_surface,
6199                                                  src_rect,
6200                                                  dst_surface,
6201                                                  dst_rect,
6202                                                  PP_NV12_LOAD_SAVE_PL3,
6203                                                  NULL);
6204         break;
6205
6206     case VA_FOURCC('Y', 'U', 'Y', '2'):
6207     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6208         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6209                                                  src_surface,
6210                                                  src_rect,
6211                                                  dst_surface,
6212                                                  dst_rect,
6213                                                  PP_NV12_LOAD_SAVE_PA,
6214                                                  NULL);
6215         break;
6216
6217     case VA_FOURCC('B', 'G', 'R', 'X'):
6218     case VA_FOURCC('B', 'G', 'R', 'A'):
6219     case VA_FOURCC('R', 'G', 'B', 'X'):
6220     case VA_FOURCC('R', 'G', 'B', 'A'):
6221         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6222                                                  src_surface,
6223                                                  src_rect,
6224                                                  dst_surface,
6225                                                  dst_rect,
6226                                                  PP_NV12_LOAD_SAVE_RGBX,
6227                                                  NULL);
6228         break;
6229
6230     default:
6231         return VA_STATUS_ERROR_UNIMPLEMENTED;
6232     }
6233
6234     intel_batchbuffer_flush(pp_context->batch);
6235
6236     return vaStatus;
6237 }
6238
6239 static VAStatus
6240 i965_image_pl1_processing(VADriverContextP ctx,
6241                           const struct i965_surface *src_surface,
6242                           const VARectangle *src_rect,
6243                           struct i965_surface *dst_surface,
6244                           const VARectangle *dst_rect)
6245 {
6246     struct i965_driver_data *i965 = i965_driver_data(ctx);
6247     struct i965_post_processing_context *pp_context = i965->pp_context;
6248     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6249     VAStatus vaStatus;
6250
6251     switch (fourcc) {
6252     case VA_FOURCC('N', 'V', '1', '2'):
6253         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6254                                                  src_surface,
6255                                                  src_rect,
6256                                                  dst_surface,
6257                                                  dst_rect,
6258                                                  PP_PA_LOAD_SAVE_NV12,
6259                                                  NULL);
6260         intel_batchbuffer_flush(pp_context->batch);
6261         break;
6262
6263     case VA_FOURCC('Y', 'V', '1', '2'):
6264         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6265                                                  src_surface,
6266                                                  src_rect,
6267                                                  dst_surface,
6268                                                  dst_rect,
6269                                                  PP_PA_LOAD_SAVE_PL3,
6270                                                  NULL);
6271         intel_batchbuffer_flush(pp_context->batch);
6272         break;
6273
6274     case VA_FOURCC('Y', 'U', 'Y', '2'):
6275     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6276         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6277                                                  src_surface,
6278                                                  src_rect,
6279                                                  dst_surface,
6280                                                  dst_rect,
6281                                                  PP_PA_LOAD_SAVE_PA,
6282                                                  NULL);
6283         intel_batchbuffer_flush(pp_context->batch);
6284         break;
6285
6286     default:
6287         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6288                                                       i965_image_pl1_processing,
6289                                                       src_surface,
6290                                                       src_rect,
6291                                                       dst_surface,
6292                                                       dst_rect);
6293         break;
6294     }
6295
6296     return vaStatus;
6297 }
6298
6299 VAStatus
6300 i965_image_processing(VADriverContextP ctx,
6301                       const struct i965_surface *src_surface,
6302                       const VARectangle *src_rect,
6303                       struct i965_surface *dst_surface,
6304                       const VARectangle *dst_rect)
6305 {
6306     struct i965_driver_data *i965 = i965_driver_data(ctx);
6307     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
6308
6309     if (HAS_PP(i965)) {
6310         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
6311
6312         _i965LockMutex(&i965->pp_mutex);
6313
6314         switch (fourcc) {
6315         case VA_FOURCC('Y', 'V', '1', '2'):
6316         case VA_FOURCC('I', '4', '2', '0'):
6317         case VA_FOURCC('I', 'M', 'C', '1'):
6318         case VA_FOURCC('I', 'M', 'C', '3'):
6319         case VA_FOURCC('4', '2', '2', 'H'):
6320         case VA_FOURCC('4', '2', '2', 'V'):
6321         case VA_FOURCC('4', '1', '1', 'P'):
6322         case VA_FOURCC('4', '4', '4', 'P'):
6323             status = i965_image_pl3_processing(ctx,
6324                                                src_surface,
6325                                                src_rect,
6326                                                dst_surface,
6327                                                dst_rect);
6328             break;
6329
6330         case  VA_FOURCC('N', 'V', '1', '2'):
6331             status = i965_image_pl2_processing(ctx,
6332                                                src_surface,
6333                                                src_rect,
6334                                                dst_surface,
6335                                                dst_rect);
6336             break;
6337         case VA_FOURCC('Y', 'U', 'Y', '2'):
6338         case VA_FOURCC('U', 'Y', 'V', 'Y'):
6339             status = i965_image_pl1_processing(ctx,
6340                                                src_surface,
6341                                                src_rect,
6342                                                dst_surface,
6343                                                dst_rect);
6344             break;
6345         case VA_FOURCC('B', 'G', 'R', 'A'):
6346         case VA_FOURCC('B', 'G', 'R', 'X'):
6347         case VA_FOURCC('R', 'G', 'B', 'A'):
6348         case VA_FOURCC('R', 'G', 'B', 'X'):
6349             status = i965_image_pl1_rgbx_processing(ctx,
6350                                                src_surface,
6351                                                src_rect,
6352                                                dst_surface,
6353                                                dst_rect);
6354             break;
6355         default:
6356             status = VA_STATUS_ERROR_UNIMPLEMENTED;
6357             break;
6358         }
6359         
6360         _i965UnlockMutex(&i965->pp_mutex);
6361     }
6362
6363     return status;
6364 }       
6365
6366 static void
6367 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
6368 {
6369     int i;
6370
6371     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
6372     pp_context->surface_state_binding_table.bo = NULL;
6373
6374     dri_bo_unreference(pp_context->curbe.bo);
6375     pp_context->curbe.bo = NULL;
6376
6377     dri_bo_unreference(pp_context->sampler_state_table.bo);
6378     pp_context->sampler_state_table.bo = NULL;
6379
6380     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
6381     pp_context->sampler_state_table.bo_8x8 = NULL;
6382
6383     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
6384     pp_context->sampler_state_table.bo_8x8_uv = NULL;
6385
6386     dri_bo_unreference(pp_context->idrt.bo);
6387     pp_context->idrt.bo = NULL;
6388     pp_context->idrt.num_interface_descriptors = 0;
6389
6390     dri_bo_unreference(pp_context->vfe_state.bo);
6391     pp_context->vfe_state.bo = NULL;
6392
6393     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
6394     pp_context->pp_dndi_context.stmm_bo = NULL;
6395
6396     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
6397     pp_context->pp_dn_context.stmm_bo = NULL;
6398
6399     for (i = 0; i < NUM_PP_MODULES; i++) {
6400         struct pp_module *pp_module = &pp_context->pp_modules[i];
6401
6402         dri_bo_unreference(pp_module->kernel.bo);
6403         pp_module->kernel.bo = NULL;
6404     }
6405
6406     free(pp_context->pp_static_parameter);
6407     free(pp_context->pp_inline_parameter);
6408     pp_context->pp_static_parameter = NULL;
6409     pp_context->pp_inline_parameter = NULL;
6410 }
6411
6412 void
6413 i965_post_processing_terminate(VADriverContextP ctx)
6414 {
6415     struct i965_driver_data *i965 = i965_driver_data(ctx);
6416     struct i965_post_processing_context *pp_context = i965->pp_context;
6417
6418     if (pp_context) {
6419         i965_post_processing_context_finalize(pp_context);
6420         free(pp_context);
6421     }
6422
6423     i965->pp_context = NULL;
6424 }
6425
6426 #define VPP_CURBE_ALLOCATION_SIZE       32
6427
6428 static void
6429 i965_post_processing_context_init(VADriverContextP ctx,
6430                                   struct i965_post_processing_context *pp_context,
6431                                   struct intel_batchbuffer *batch)
6432 {
6433     struct i965_driver_data *i965 = i965_driver_data(ctx);
6434     int i;
6435
6436     if (IS_IRONLAKE(i965->intel.device_id)) {
6437         pp_context->urb.size = URB_SIZE((&i965->intel));
6438         pp_context->urb.num_vfe_entries = 32;
6439         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
6440         pp_context->urb.num_cs_entries = 1;
6441         pp_context->urb.size_cs_entry = 2;
6442         pp_context->urb.vfe_start = 0;
6443         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
6444             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
6445         assert(pp_context->urb.cs_start +
6446             pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
6447     } else {
6448         pp_context->vfe_gpu_state.max_num_threads = 60;
6449         pp_context->vfe_gpu_state.num_urb_entries = 59;
6450         pp_context->vfe_gpu_state.gpgpu_mode = 0;
6451         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
6452         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
6453     }
6454     
6455
6456     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
6457     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
6458     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
6459     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
6460     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8));
6461
6462     if (IS_GEN8(i965->intel.device_id))
6463         memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules));
6464     else if (IS_HASWELL(i965->intel.device_id))
6465         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
6466     else if (IS_GEN7(i965->intel.device_id))
6467         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
6468     else if (IS_GEN6(i965->intel.device_id))
6469         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
6470     else if (IS_IRONLAKE(i965->intel.device_id))
6471         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
6472
6473     for (i = 0; i < NUM_PP_MODULES; i++) {
6474         struct pp_module *pp_module = &pp_context->pp_modules[i];
6475         dri_bo_unreference(pp_module->kernel.bo);
6476         if (pp_module->kernel.bin && pp_module->kernel.size) {
6477             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
6478                                                 pp_module->kernel.name,
6479                                                 pp_module->kernel.size,
6480                                                 4096);
6481             assert(pp_module->kernel.bo);
6482             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
6483         } else {
6484             pp_module->kernel.bo = NULL;
6485         }
6486     }
6487
6488     /* static & inline parameters */
6489     if (IS_GEN7(i965->intel.device_id) ||
6490         IS_GEN8(i965->intel.device_id)) {
6491         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
6492         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
6493     } else {
6494         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
6495         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
6496     }
6497
6498     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
6499     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
6500     pp_context->pp_dndi_context.frame_order = -1;
6501     pp_context->batch = batch;
6502 }
6503
6504 bool
6505 i965_post_processing_init(VADriverContextP ctx)
6506 {
6507     struct i965_driver_data *i965 = i965_driver_data(ctx);
6508     struct i965_post_processing_context *pp_context = i965->pp_context;
6509
6510     if (HAS_PP(i965)) {
6511         if (pp_context == NULL) {
6512             pp_context = calloc(1, sizeof(*pp_context));
6513             i965_post_processing_context_init(ctx, pp_context, i965->pp_batch);
6514             i965->pp_context = pp_context;
6515         }
6516     }
6517
6518     return true;
6519 }
6520
6521 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
6522     PP_NULL,    /* VAProcFilterNone */
6523     PP_NV12_DN, /* VAProcFilterNoiseReduction */
6524     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
6525     PP_NULL,    /* VAProcFilterSharpening */
6526     PP_NULL,    /* VAProcFilterColorBalance */
6527 };
6528
6529 static const int proc_frame_to_pp_frame[3] = {
6530     I965_SURFACE_FLAG_FRAME,
6531     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
6532     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
6533 };
6534
6535 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
6536
6537 VAStatus 
6538 i965_proc_picture(VADriverContextP ctx, 
6539                   VAProfile profile, 
6540                   union codec_state *codec_state,
6541                   struct hw_context *hw_context)
6542 {
6543     struct i965_driver_data *i965 = i965_driver_data(ctx);
6544     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6545     struct proc_state *proc_state = &codec_state->proc;
6546     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
6547     struct object_surface *obj_surface;
6548     struct i965_surface src_surface, dst_surface;
6549     VARectangle src_rect, dst_rect;
6550     VAStatus status;
6551     int i;
6552     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
6553     int num_tmp_surfaces = 0;
6554     unsigned int tiling = 0, swizzle = 0;
6555     int in_width, in_height;
6556
6557     if (pipeline_param->surface == VA_INVALID_ID ||
6558         proc_state->current_render_target == VA_INVALID_ID) {
6559         status = VA_STATUS_ERROR_INVALID_SURFACE;
6560         goto error;
6561     }
6562
6563     obj_surface = SURFACE(pipeline_param->surface);
6564
6565     if (!obj_surface) {
6566         status = VA_STATUS_ERROR_INVALID_SURFACE;
6567         goto error;
6568     }
6569
6570     if (!obj_surface->bo) {
6571         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6572         goto error;
6573     }
6574
6575     if (pipeline_param->num_filters && !pipeline_param->filters) {
6576         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6577         goto error;
6578     }
6579
6580     in_width = obj_surface->orig_width;
6581     in_height = obj_surface->orig_height;
6582     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6583
6584     src_surface.base = (struct object_base *)obj_surface;
6585     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6586     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6587
6588     VASurfaceID out_surface_id = VA_INVALID_ID;
6589     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
6590         src_surface.base = (struct object_base *)obj_surface;
6591         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6592         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6593         src_rect.x = 0;
6594         src_rect.y = 0;
6595         src_rect.width = in_width;
6596         src_rect.height = in_height;
6597
6598         status = i965_CreateSurfaces(ctx,
6599                                      in_width,
6600                                      in_height,
6601                                      VA_RT_FORMAT_YUV420,
6602                                      1,
6603                                      &out_surface_id);
6604         assert(status == VA_STATUS_SUCCESS);
6605         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6606         obj_surface = SURFACE(out_surface_id);
6607         assert(obj_surface);
6608         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
6609
6610         dst_surface.base = (struct object_base *)obj_surface;
6611         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6612         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6613         dst_rect.x = 0;
6614         dst_rect.y = 0;
6615         dst_rect.width = in_width;
6616         dst_rect.height = in_height;
6617
6618         status = i965_image_processing(ctx,
6619                                        &src_surface,
6620                                        &src_rect,
6621                                        &dst_surface,
6622                                        &dst_rect);
6623         assert(status == VA_STATUS_SUCCESS);
6624
6625         src_surface.base = (struct object_base *)obj_surface;
6626         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6627         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6628     }
6629
6630     if (pipeline_param->surface_region) {
6631         src_rect.x = pipeline_param->surface_region->x;
6632         src_rect.y = pipeline_param->surface_region->y;
6633         src_rect.width = pipeline_param->surface_region->width;
6634         src_rect.height = pipeline_param->surface_region->height;
6635     } else {
6636         src_rect.x = 0;
6637         src_rect.y = 0;
6638         src_rect.width = in_width;
6639         src_rect.height = in_height;
6640     }
6641
6642     if (pipeline_param->output_region) {
6643         dst_rect.x = pipeline_param->output_region->x;
6644         dst_rect.y = pipeline_param->output_region->y;
6645         dst_rect.width = pipeline_param->output_region->width;
6646         dst_rect.height = pipeline_param->output_region->height;
6647     } else {
6648         dst_rect.x = 0;
6649         dst_rect.y = 0;
6650         dst_rect.width = in_width;
6651         dst_rect.height = in_height;
6652     }
6653
6654     proc_context->pp_context.pipeline_param = pipeline_param;
6655
6656     for (i = 0; i < pipeline_param->num_filters; i++) {
6657         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6658         VAProcFilterParameterBufferBase *filter_param = NULL;
6659         VAProcFilterType filter_type;
6660         int kernel_index;
6661
6662         if (!obj_buffer ||
6663             !obj_buffer->buffer_store ||
6664             !obj_buffer->buffer_store->buffer) {
6665             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6666             goto error;
6667         }
6668
6669         out_surface_id = VA_INVALID_ID;
6670         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6671         filter_type = filter_param->type;
6672         kernel_index = procfilter_to_pp_flag[filter_type];
6673
6674         if (kernel_index != PP_NULL &&
6675             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6676             status = i965_CreateSurfaces(ctx,
6677                                          in_width,
6678                                          in_height,
6679                                          VA_RT_FORMAT_YUV420,
6680                                          1,
6681                                          &out_surface_id);
6682             assert(status == VA_STATUS_SUCCESS);
6683             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6684             obj_surface = SURFACE(out_surface_id);
6685             assert(obj_surface);
6686             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6687             dst_surface.base = (struct object_base *)obj_surface;
6688             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6689             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6690                                                    &src_surface,
6691                                                    &src_rect,
6692                                                    &dst_surface,
6693                                                    &src_rect,
6694                                                    kernel_index,
6695                                                    filter_param);
6696
6697             if (status == VA_STATUS_SUCCESS) {
6698                 src_surface.base = dst_surface.base;
6699                 src_surface.type = dst_surface.type;
6700                 src_surface.flags = dst_surface.flags;
6701             }
6702         }
6703     }
6704
6705     proc_context->pp_context.pipeline_param = NULL;
6706     obj_surface = SURFACE(proc_state->current_render_target);
6707     
6708     if (!obj_surface) {
6709         status = VA_STATUS_ERROR_INVALID_SURFACE;
6710         goto error;
6711     }
6712
6713     int csc_needed = 0;
6714     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
6715         csc_needed = 1;
6716         out_surface_id = VA_INVALID_ID;
6717         status = i965_CreateSurfaces(ctx,
6718                                      obj_surface->orig_width,
6719                                      obj_surface->orig_height,
6720                                      VA_RT_FORMAT_YUV420, 
6721                                      1,
6722                                      &out_surface_id);
6723         assert(status == VA_STATUS_SUCCESS);
6724         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6725         struct object_surface *csc_surface = SURFACE(out_surface_id);
6726         assert(csc_surface);
6727         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6728         dst_surface.base = (struct object_base *)csc_surface;
6729     } else {
6730         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6731         dst_surface.base = (struct object_base *)obj_surface;
6732     }
6733
6734     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6735     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
6736
6737     // load/save doesn't support different origin offset for src and dst surface
6738     if (src_rect.width == dst_rect.width &&
6739         src_rect.height == dst_rect.height &&
6740         src_rect.x == dst_rect.x &&
6741         src_rect.y == dst_rect.y) {
6742         i965_post_processing_internal(ctx, &proc_context->pp_context,
6743                                       &src_surface,
6744                                       &src_rect,
6745                                       &dst_surface,
6746                                       &dst_rect,
6747                                       PP_NV12_LOAD_SAVE_N12,
6748                                       NULL);
6749     } else {
6750
6751         i965_post_processing_internal(ctx, &proc_context->pp_context,
6752                                       &src_surface,
6753                                       &src_rect,
6754                                       &dst_surface,
6755                                       &dst_rect,
6756                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
6757                                       PP_NV12_AVS : PP_NV12_SCALING,
6758                                       NULL);
6759     }
6760
6761     if (csc_needed) {
6762         src_surface.base = dst_surface.base;
6763         src_surface.type = dst_surface.type;
6764         src_surface.flags = dst_surface.flags;
6765         dst_surface.base = (struct object_base *)obj_surface;
6766         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6767         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6768     }
6769     
6770     if (num_tmp_surfaces)
6771         i965_DestroySurfaces(ctx,
6772                              tmp_surfaces,
6773                              num_tmp_surfaces);
6774
6775     intel_batchbuffer_flush(hw_context->batch);
6776
6777     return VA_STATUS_SUCCESS;
6778
6779 error:
6780     if (num_tmp_surfaces)
6781         i965_DestroySurfaces(ctx,
6782                              tmp_surfaces,
6783                              num_tmp_surfaces);
6784
6785     return status;
6786 }
6787
6788 static void
6789 i965_proc_context_destroy(void *hw_context)
6790 {
6791     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6792
6793     i965_post_processing_context_finalize(&proc_context->pp_context);
6794     intel_batchbuffer_free(proc_context->base.batch);
6795     free(proc_context);
6796 }
6797
6798 struct hw_context *
6799 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6800 {
6801     struct intel_driver_data *intel = intel_driver_data(ctx);
6802     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6803
6804     proc_context->base.destroy = i965_proc_context_destroy;
6805     proc_context->base.run = i965_proc_picture;
6806     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6807     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6808
6809     return (struct hw_context *)proc_context;
6810 }
6811
6812