Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
44                      IS_GEN6((ctx)->intel.device_id) ||         \
45                      IS_GEN7((ctx)->intel.device_id) ||         \
46                      IS_GEN8((ctx)->intel.device_id))
47
48
49 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
50                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
51
52 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
53 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
54
55 #define GPU_ASM_BLOCK_WIDTH         16
56 #define GPU_ASM_BLOCK_HEIGHT        8
57 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
58
59 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
60
61 extern VAStatus
62 i965_DestroySurfaces(VADriverContextP ctx,
63                      VASurfaceID *surface_list,
64                      int num_surfaces);
65 extern VAStatus
66 i965_CreateSurfaces(VADriverContextP ctx,
67                     int width,
68                     int height,
69                     int format,
70                     int num_surfaces,
71                     VASurfaceID *surfaces);
72
73 static const uint32_t pp_null_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
75 };
76
77 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
79 };
80
81 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
83 };
84
85 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
87 };
88
89 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
91 };
92
93 static const uint32_t pp_nv12_scaling_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
95 };
96
97 static const uint32_t pp_nv12_avs_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
99 };
100
101 static const uint32_t pp_nv12_dndi_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
103 };
104
105 static const uint32_t pp_nv12_dn_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
107 };
108
109 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
111 };
112
113 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
115 };
116
117 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
119 };
120
121 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
123 };
124
125 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
127 };
128
129 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
130 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
131 };
132
133 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
134 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
135 };
136
137 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
138                                    const struct i965_surface *src_surface,
139                                    const VARectangle *src_rect,
140                                    struct i965_surface *dst_surface,
141                                    const VARectangle *dst_rect,
142                                    void *filter_param);
143 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
144                                             const struct i965_surface *src_surface,
145                                             const VARectangle *src_rect,
146                                             struct i965_surface *dst_surface,
147                                             const VARectangle *dst_rect,
148                                             void *filter_param);
149 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
150                                            const struct i965_surface *src_surface,
151                                            const VARectangle *src_rect,
152                                            struct i965_surface *dst_surface,
153                                            const VARectangle *dst_rect,
154                                            void *filter_param);
155 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
156                                              const struct i965_surface *src_surface,
157                                              const VARectangle *src_rect,
158                                              struct i965_surface *dst_surface,
159                                              const VARectangle *dst_rect,
160                                              void *filter_param);
161 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
162                                                 const struct i965_surface *src_surface,
163                                                 const VARectangle *src_rect,
164                                                 struct i965_surface *dst_surface,
165                                                 const VARectangle *dst_rect,
166                                                 void *filter_param);
167 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
168                                         const struct i965_surface *src_surface,
169                                         const VARectangle *src_rect,
170                                         struct i965_surface *dst_surface,
171                                         const VARectangle *dst_rect,
172                                         void *filter_param);
173 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
174                                       const struct i965_surface *src_surface,
175                                       const VARectangle *src_rect,
176                                       struct i965_surface *dst_surface,
177                                       const VARectangle *dst_rect,
178                                       void *filter_param);
179
180 static struct pp_module pp_modules_gen5[] = {
181     {
182         {
183             "NULL module (for testing)",
184             PP_NULL,
185             pp_null_gen5,
186             sizeof(pp_null_gen5),
187             NULL,
188         },
189
190         pp_null_initialize,
191     },
192
193     {
194         {
195             "NV12_NV12",
196             PP_NV12_LOAD_SAVE_N12,
197             pp_nv12_load_save_nv12_gen5,
198             sizeof(pp_nv12_load_save_nv12_gen5),
199             NULL,
200         },
201
202         pp_plx_load_save_plx_initialize,
203     },
204
205     {
206         {
207             "NV12_PL3",
208             PP_NV12_LOAD_SAVE_PL3,
209             pp_nv12_load_save_pl3_gen5,
210             sizeof(pp_nv12_load_save_pl3_gen5),
211             NULL,
212         },
213
214         pp_plx_load_save_plx_initialize,
215     },
216
217     {
218         {
219             "PL3_NV12",
220             PP_PL3_LOAD_SAVE_N12,
221             pp_pl3_load_save_nv12_gen5,
222             sizeof(pp_pl3_load_save_nv12_gen5),
223             NULL,
224         },
225
226         pp_plx_load_save_plx_initialize,
227     },
228
229     {
230         {
231             "PL3_PL3",
232             PP_PL3_LOAD_SAVE_PL3,
233             pp_pl3_load_save_pl3_gen5,
234             sizeof(pp_pl3_load_save_pl3_gen5),
235             NULL,
236         },
237
238         pp_plx_load_save_plx_initialize
239     },
240
241     {
242         {
243             "NV12 Scaling module",
244             PP_NV12_SCALING,
245             pp_nv12_scaling_gen5,
246             sizeof(pp_nv12_scaling_gen5),
247             NULL,
248         },
249
250         pp_nv12_scaling_initialize,
251     },
252
253     {
254         {
255             "NV12 AVS module",
256             PP_NV12_AVS,
257             pp_nv12_avs_gen5,
258             sizeof(pp_nv12_avs_gen5),
259             NULL,
260         },
261
262         pp_nv12_avs_initialize_nlas,
263     },
264
265     {
266         {
267             "NV12 DNDI module",
268             PP_NV12_DNDI,
269             pp_nv12_dndi_gen5,
270             sizeof(pp_nv12_dndi_gen5),
271             NULL,
272         },
273
274         pp_nv12_dndi_initialize,
275     },
276
277     {
278         {
279             "NV12 DN module",
280             PP_NV12_DN,
281             pp_nv12_dn_gen5,
282             sizeof(pp_nv12_dn_gen5),
283             NULL,
284         },
285
286         pp_nv12_dn_initialize,
287     },
288
289     {
290         {
291             "NV12_PA module",
292             PP_NV12_LOAD_SAVE_PA,
293             pp_nv12_load_save_pa_gen5,
294             sizeof(pp_nv12_load_save_pa_gen5),
295             NULL,
296         },
297     
298         pp_plx_load_save_plx_initialize,
299     },
300
301     {
302         {
303             "PL3_PA module",
304             PP_PL3_LOAD_SAVE_PA,
305             pp_pl3_load_save_pa_gen5,
306             sizeof(pp_pl3_load_save_pa_gen5),
307             NULL,
308         },
309     
310         pp_plx_load_save_plx_initialize,
311     },
312
313     {
314         {
315             "PA_NV12 module",
316             PP_PA_LOAD_SAVE_NV12,
317             pp_pa_load_save_nv12_gen5,
318             sizeof(pp_pa_load_save_nv12_gen5),
319             NULL,
320         },
321     
322         pp_plx_load_save_plx_initialize,
323     },
324
325     {
326         {
327             "PA_PL3 module",
328             PP_PA_LOAD_SAVE_PL3,
329             pp_pa_load_save_pl3_gen5,
330             sizeof(pp_pa_load_save_pl3_gen5),
331             NULL,
332         },
333     
334         pp_plx_load_save_plx_initialize,
335     },
336
337     {
338         {
339             "PA_PA module",
340             PP_PA_LOAD_SAVE_PA,
341             pp_pa_load_save_pa_gen5,
342             sizeof(pp_pa_load_save_pa_gen5),
343             NULL,
344         },
345
346         pp_plx_load_save_plx_initialize,
347     },
348
349     {
350         {
351             "RGBX_NV12 module",
352             PP_RGBX_LOAD_SAVE_NV12,
353             pp_rgbx_load_save_nv12_gen5,
354             sizeof(pp_rgbx_load_save_nv12_gen5),
355             NULL,
356         },
357     
358         pp_plx_load_save_plx_initialize,
359     },
360             
361     {
362         {
363             "NV12_RGBX module",
364             PP_NV12_LOAD_SAVE_RGBX,
365             pp_nv12_load_save_rgbx_gen5,
366             sizeof(pp_nv12_load_save_rgbx_gen5),
367             NULL,
368         },
369     
370         pp_plx_load_save_plx_initialize,
371     },
372 };
373
374 static const uint32_t pp_null_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/null.g6b"
376 };
377
378 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
380 };
381
382 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
384 };
385
386 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
388 };
389
390 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
392 };
393
394 static const uint32_t pp_nv12_scaling_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
396 };
397
398 static const uint32_t pp_nv12_avs_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
400 };
401
402 static const uint32_t pp_nv12_dndi_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
404 };
405
406 static const uint32_t pp_nv12_dn_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
408 };
409
410 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
412 };
413
414 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
416 };
417
418 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
420 };
421
422 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
424 };
425
426 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
428 };
429
430 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
431 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
432 };
433
434 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
435 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
436 };
437
438 static struct pp_module pp_modules_gen6[] = {
439     {
440         {
441             "NULL module (for testing)",
442             PP_NULL,
443             pp_null_gen6,
444             sizeof(pp_null_gen6),
445             NULL,
446         },
447
448         pp_null_initialize,
449     },
450
451     {
452         {
453             "NV12_NV12",
454             PP_NV12_LOAD_SAVE_N12,
455             pp_nv12_load_save_nv12_gen6,
456             sizeof(pp_nv12_load_save_nv12_gen6),
457             NULL,
458         },
459
460         pp_plx_load_save_plx_initialize,
461     },
462
463     {
464         {
465             "NV12_PL3",
466             PP_NV12_LOAD_SAVE_PL3,
467             pp_nv12_load_save_pl3_gen6,
468             sizeof(pp_nv12_load_save_pl3_gen6),
469             NULL,
470         },
471         
472         pp_plx_load_save_plx_initialize,
473     },
474
475     {
476         {
477             "PL3_NV12",
478             PP_PL3_LOAD_SAVE_N12,
479             pp_pl3_load_save_nv12_gen6,
480             sizeof(pp_pl3_load_save_nv12_gen6),
481             NULL,
482         },
483
484         pp_plx_load_save_plx_initialize,
485     },
486
487     {
488         {
489             "PL3_PL3",
490             PP_PL3_LOAD_SAVE_PL3,
491             pp_pl3_load_save_pl3_gen6,
492             sizeof(pp_pl3_load_save_pl3_gen6),
493             NULL,
494         },
495
496         pp_plx_load_save_plx_initialize,
497     },
498
499     {
500         {
501             "NV12 Scaling module",
502             PP_NV12_SCALING,
503             pp_nv12_scaling_gen6,
504             sizeof(pp_nv12_scaling_gen6),
505             NULL,
506         },
507
508         gen6_nv12_scaling_initialize,
509     },
510
511     {
512         {
513             "NV12 AVS module",
514             PP_NV12_AVS,
515             pp_nv12_avs_gen6,
516             sizeof(pp_nv12_avs_gen6),
517             NULL,
518         },
519
520         pp_nv12_avs_initialize_nlas,
521     },
522
523     {
524         {
525             "NV12 DNDI module",
526             PP_NV12_DNDI,
527             pp_nv12_dndi_gen6,
528             sizeof(pp_nv12_dndi_gen6),
529             NULL,
530         },
531
532         pp_nv12_dndi_initialize,
533     },
534
535     {
536         {
537             "NV12 DN module",
538             PP_NV12_DN,
539             pp_nv12_dn_gen6,
540             sizeof(pp_nv12_dn_gen6),
541             NULL,
542         },
543
544         pp_nv12_dn_initialize,
545     },
546     {
547         {
548             "NV12_PA module",
549             PP_NV12_LOAD_SAVE_PA,
550             pp_nv12_load_save_pa_gen6,
551             sizeof(pp_nv12_load_save_pa_gen6),
552             NULL,
553         },
554     
555         pp_plx_load_save_plx_initialize,
556     },
557
558     {
559         {
560             "PL3_PA module",
561             PP_PL3_LOAD_SAVE_PA,
562             pp_pl3_load_save_pa_gen6,
563             sizeof(pp_pl3_load_save_pa_gen6),
564             NULL,
565         },
566     
567         pp_plx_load_save_plx_initialize,
568     },
569
570     {
571         {
572             "PA_NV12 module",
573             PP_PA_LOAD_SAVE_NV12,
574             pp_pa_load_save_nv12_gen6,
575             sizeof(pp_pa_load_save_nv12_gen6),
576             NULL,
577         },
578     
579         pp_plx_load_save_plx_initialize,
580     },
581
582     {
583         {
584             "PA_PL3 module",
585             PP_PA_LOAD_SAVE_PL3,
586             pp_pa_load_save_pl3_gen6,
587             sizeof(pp_pa_load_save_pl3_gen6),
588             NULL,
589         },
590     
591         pp_plx_load_save_plx_initialize,
592     },
593
594     {
595         {
596             "PA_PA module",
597             PP_PA_LOAD_SAVE_PA,
598             pp_pa_load_save_pa_gen6,
599             sizeof(pp_pa_load_save_pa_gen6),
600             NULL,
601         },
602
603         pp_plx_load_save_plx_initialize,
604     },
605
606     {
607         {
608             "RGBX_NV12 module",
609             PP_RGBX_LOAD_SAVE_NV12,
610             pp_rgbx_load_save_nv12_gen6,
611             sizeof(pp_rgbx_load_save_nv12_gen6),
612             NULL,
613         },
614     
615         pp_plx_load_save_plx_initialize,
616     },
617
618     {
619         {
620             "NV12_RGBX module",
621             PP_NV12_LOAD_SAVE_RGBX,
622             pp_nv12_load_save_rgbx_gen6,
623             sizeof(pp_nv12_load_save_rgbx_gen6),
624             NULL,
625         },
626     
627         pp_plx_load_save_plx_initialize,
628     },
629 };
630
631 static const uint32_t pp_null_gen7[][4] = {
632 };
633
634 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
636 };
637
638 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
640 };
641
642 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
643 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
644 };
645
646 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
647 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
648 };
649
650 static const uint32_t pp_nv12_scaling_gen7[][4] = {
651 #include "shaders/post_processing/gen7/avs.g7b"
652 };
653
654 static const uint32_t pp_nv12_avs_gen7[][4] = {
655 #include "shaders/post_processing/gen7/avs.g7b"
656 };
657
658 static const uint32_t pp_nv12_dndi_gen7[][4] = {
659 #include "shaders/post_processing/gen7/dndi.g7b"
660 };
661
662 static const uint32_t pp_nv12_dn_gen7[][4] = {
663 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
664 };
665 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
666 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
667 };
668 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
669 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
670 };
671 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
672 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
673 };
674 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
675 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
676 };
677 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
678 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
679 };
680 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
681 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
682 };
683 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
684 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
685 };
686
687 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
688                                            const struct i965_surface *src_surface,
689                                            const VARectangle *src_rect,
690                                            struct i965_surface *dst_surface,
691                                            const VARectangle *dst_rect,
692                                            void *filter_param);
693 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
694                                              const struct i965_surface *src_surface,
695                                              const VARectangle *src_rect,
696                                              struct i965_surface *dst_surface,
697                                              const VARectangle *dst_rect,
698                                              void *filter_param);
699 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
700                                            const struct i965_surface *src_surface,
701                                            const VARectangle *src_rect,
702                                            struct i965_surface *dst_surface,
703                                            const VARectangle *dst_rect,
704                                            void *filter_param);
705
706 static VAStatus gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
707                                            const struct i965_surface *src_surface,
708                                            const VARectangle *src_rect,
709                                            struct i965_surface *dst_surface,
710                                            const VARectangle *dst_rect,
711                                            void *filter_param);
712
713 static struct pp_module pp_modules_gen7[] = {
714     {
715         {
716             "NULL module (for testing)",
717             PP_NULL,
718             pp_null_gen7,
719             sizeof(pp_null_gen7),
720             NULL,
721         },
722
723         pp_null_initialize,
724     },
725
726     {
727         {
728             "NV12_NV12",
729             PP_NV12_LOAD_SAVE_N12,
730             pp_nv12_load_save_nv12_gen7,
731             sizeof(pp_nv12_load_save_nv12_gen7),
732             NULL,
733         },
734
735         gen7_pp_plx_avs_initialize,
736     },
737
738     {
739         {
740             "NV12_PL3",
741             PP_NV12_LOAD_SAVE_PL3,
742             pp_nv12_load_save_pl3_gen7,
743             sizeof(pp_nv12_load_save_pl3_gen7),
744             NULL,
745         },
746         
747         gen7_pp_plx_avs_initialize,
748     },
749
750     {
751         {
752             "PL3_NV12",
753             PP_PL3_LOAD_SAVE_N12,
754             pp_pl3_load_save_nv12_gen7,
755             sizeof(pp_pl3_load_save_nv12_gen7),
756             NULL,
757         },
758
759         gen7_pp_plx_avs_initialize,
760     },
761
762     {
763         {
764             "PL3_PL3",
765             PP_PL3_LOAD_SAVE_PL3,
766             pp_pl3_load_save_pl3_gen7,
767             sizeof(pp_pl3_load_save_pl3_gen7),
768             NULL,
769         },
770
771         gen7_pp_plx_avs_initialize,
772     },
773
774     {
775         {
776             "NV12 Scaling module",
777             PP_NV12_SCALING,
778             pp_nv12_scaling_gen7,
779             sizeof(pp_nv12_scaling_gen7),
780             NULL,
781         },
782
783         gen7_pp_plx_avs_initialize,
784     },
785
786     {
787         {
788             "NV12 AVS module",
789             PP_NV12_AVS,
790             pp_nv12_avs_gen7,
791             sizeof(pp_nv12_avs_gen7),
792             NULL,
793         },
794
795         gen7_pp_plx_avs_initialize,
796     },
797
798     {
799         {
800             "NV12 DNDI module",
801             PP_NV12_DNDI,
802             pp_nv12_dndi_gen7,
803             sizeof(pp_nv12_dndi_gen7),
804             NULL,
805         },
806
807         gen7_pp_nv12_dndi_initialize,
808     },
809
810     {
811         {
812             "NV12 DN module",
813             PP_NV12_DN,
814             pp_nv12_dn_gen7,
815             sizeof(pp_nv12_dn_gen7),
816             NULL,
817         },
818
819         gen7_pp_nv12_dn_initialize,
820     },
821     {
822         {
823             "NV12_PA module",
824             PP_NV12_LOAD_SAVE_PA,
825             pp_nv12_load_save_pa_gen7,
826             sizeof(pp_nv12_load_save_pa_gen7),
827             NULL,
828         },
829     
830         gen7_pp_plx_avs_initialize,
831     },
832
833     {
834         {
835             "PL3_PA module",
836             PP_PL3_LOAD_SAVE_PA,
837             pp_pl3_load_save_pa_gen7,
838             sizeof(pp_pl3_load_save_pa_gen7),
839             NULL,
840         },
841     
842         gen7_pp_plx_avs_initialize,
843     },
844
845     {
846         {
847             "PA_NV12 module",
848             PP_PA_LOAD_SAVE_NV12,
849             pp_pa_load_save_nv12_gen7,
850             sizeof(pp_pa_load_save_nv12_gen7),
851             NULL,
852         },
853     
854         gen7_pp_plx_avs_initialize,
855     },
856
857     {
858         {
859             "PA_PL3 module",
860             PP_PA_LOAD_SAVE_PL3,
861             pp_pa_load_save_pl3_gen7,
862             sizeof(pp_pa_load_save_pl3_gen7),
863             NULL,
864         },
865     
866         gen7_pp_plx_avs_initialize,
867     },
868
869     {
870         {
871             "PA_PA module",
872             PP_PA_LOAD_SAVE_PA,
873             pp_pa_load_save_pa_gen7,
874             sizeof(pp_pa_load_save_pa_gen7),
875             NULL,
876         },
877
878         gen7_pp_plx_avs_initialize,
879     },
880
881     {
882         {
883             "RGBX_NV12 module",
884             PP_RGBX_LOAD_SAVE_NV12,
885             pp_rgbx_load_save_nv12_gen7,
886             sizeof(pp_rgbx_load_save_nv12_gen7),
887             NULL,
888         },
889     
890         gen7_pp_plx_avs_initialize,
891     },
892
893     {
894         {
895             "NV12_RGBX module",
896             PP_NV12_LOAD_SAVE_RGBX,
897             pp_nv12_load_save_rgbx_gen7,
898             sizeof(pp_nv12_load_save_rgbx_gen7),
899             NULL,
900         },
901     
902         gen7_pp_plx_avs_initialize,
903     },
904             
905 };
906
907 static const uint32_t pp_null_gen75[][4] = {
908 };
909
910 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
911 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
912 };
913
914 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
915 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
916 };
917
918 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
919 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
920 };
921
922 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
923 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
924 };
925
926 static const uint32_t pp_nv12_scaling_gen75[][4] = {
927 #include "shaders/post_processing/gen7/avs.g75b"
928 };
929
930 static const uint32_t pp_nv12_avs_gen75[][4] = {
931 #include "shaders/post_processing/gen7/avs.g75b"
932 };
933
934 static const uint32_t pp_nv12_dndi_gen75[][4] = {
935 // #include "shaders/post_processing/gen7/dndi.g75b"
936 };
937
938 static const uint32_t pp_nv12_dn_gen75[][4] = {
939 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
940 };
941 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
942 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
943 };
944 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
945 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
946 };
947 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
948 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
949 };
950 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
951 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
952 };
953 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
954 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
955 };
956 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
957 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
958 };
959 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
960 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
961 };
962
963 static struct pp_module pp_modules_gen75[] = {
964     {
965         {
966             "NULL module (for testing)",
967             PP_NULL,
968             pp_null_gen75,
969             sizeof(pp_null_gen75),
970             NULL,
971         },
972
973         pp_null_initialize,
974     },
975
976     {
977         {
978             "NV12_NV12",
979             PP_NV12_LOAD_SAVE_N12,
980             pp_nv12_load_save_nv12_gen75,
981             sizeof(pp_nv12_load_save_nv12_gen75),
982             NULL,
983         },
984
985         gen7_pp_plx_avs_initialize,
986     },
987
988     {
989         {
990             "NV12_PL3",
991             PP_NV12_LOAD_SAVE_PL3,
992             pp_nv12_load_save_pl3_gen75,
993             sizeof(pp_nv12_load_save_pl3_gen75),
994             NULL,
995         },
996         
997         gen7_pp_plx_avs_initialize,
998     },
999
1000     {
1001         {
1002             "PL3_NV12",
1003             PP_PL3_LOAD_SAVE_N12,
1004             pp_pl3_load_save_nv12_gen75,
1005             sizeof(pp_pl3_load_save_nv12_gen75),
1006             NULL,
1007         },
1008
1009         gen7_pp_plx_avs_initialize,
1010     },
1011
1012     {
1013         {
1014             "PL3_PL3",
1015             PP_PL3_LOAD_SAVE_PL3,
1016             pp_pl3_load_save_pl3_gen75,
1017             sizeof(pp_pl3_load_save_pl3_gen75),
1018             NULL,
1019         },
1020
1021         gen7_pp_plx_avs_initialize,
1022     },
1023
1024     {
1025         {
1026             "NV12 Scaling module",
1027             PP_NV12_SCALING,
1028             pp_nv12_scaling_gen75,
1029             sizeof(pp_nv12_scaling_gen75),
1030             NULL,
1031         },
1032
1033         gen7_pp_plx_avs_initialize,
1034     },
1035
1036     {
1037         {
1038             "NV12 AVS module",
1039             PP_NV12_AVS,
1040             pp_nv12_avs_gen75,
1041             sizeof(pp_nv12_avs_gen75),
1042             NULL,
1043         },
1044
1045         gen7_pp_plx_avs_initialize,
1046     },
1047
1048     {
1049         {
1050             "NV12 DNDI module",
1051             PP_NV12_DNDI,
1052             pp_nv12_dndi_gen75,
1053             sizeof(pp_nv12_dndi_gen75),
1054             NULL,
1055         },
1056
1057         gen7_pp_nv12_dn_initialize,
1058     },
1059
1060     {
1061         {
1062             "NV12 DN module",
1063             PP_NV12_DN,
1064             pp_nv12_dn_gen75,
1065             sizeof(pp_nv12_dn_gen75),
1066             NULL,
1067         },
1068
1069         gen7_pp_nv12_dn_initialize,
1070     },
1071
1072     {
1073         {
1074             "NV12_PA module",
1075             PP_NV12_LOAD_SAVE_PA,
1076             pp_nv12_load_save_pa_gen75,
1077             sizeof(pp_nv12_load_save_pa_gen75),
1078             NULL,
1079         },
1080     
1081         gen7_pp_plx_avs_initialize,
1082     },
1083
1084     {
1085         {
1086             "PL3_PA module",
1087             PP_PL3_LOAD_SAVE_PA,
1088             pp_pl3_load_save_pa_gen75,
1089             sizeof(pp_pl3_load_save_pa_gen75),
1090             NULL,
1091         },
1092     
1093         gen7_pp_plx_avs_initialize,
1094     },
1095
1096     {
1097         {
1098             "PA_NV12 module",
1099             PP_PA_LOAD_SAVE_NV12,
1100             pp_pa_load_save_nv12_gen75,
1101             sizeof(pp_pa_load_save_nv12_gen75),
1102             NULL,
1103         },
1104     
1105         gen7_pp_plx_avs_initialize,
1106     },
1107
1108     {
1109         {
1110             "PA_PL3 module",
1111             PP_PA_LOAD_SAVE_PL3,
1112             pp_pa_load_save_pl3_gen75,
1113             sizeof(pp_pa_load_save_pl3_gen75),
1114             NULL,
1115         },
1116     
1117         gen7_pp_plx_avs_initialize,
1118     },
1119
1120     {
1121         {
1122             "PA_PA module",
1123             PP_PA_LOAD_SAVE_PA,
1124             pp_pa_load_save_pa_gen75,
1125             sizeof(pp_pa_load_save_pa_gen75),
1126             NULL,
1127         },
1128
1129         gen7_pp_plx_avs_initialize,
1130     },
1131
1132     {
1133         {
1134             "RGBX_NV12 module",
1135             PP_RGBX_LOAD_SAVE_NV12,
1136             pp_rgbx_load_save_nv12_gen75,
1137             sizeof(pp_rgbx_load_save_nv12_gen75),
1138             NULL,
1139         },
1140     
1141         gen7_pp_plx_avs_initialize,
1142     },
1143
1144     {
1145         {
1146             "NV12_RGBX module",
1147             PP_NV12_LOAD_SAVE_RGBX,
1148             pp_nv12_load_save_rgbx_gen75,
1149             sizeof(pp_nv12_load_save_rgbx_gen75),
1150             NULL,
1151         },
1152     
1153         gen7_pp_plx_avs_initialize,
1154     },
1155             
1156 };
1157
1158 /* TODO: Modify the shader and then compile it again.
1159  * Currently it is derived from Haswell*/
1160 static const uint32_t pp_null_gen8[][4] = {
1161 };
1162
1163 static const uint32_t pp_nv12_load_save_nv12_gen8[][4] = {
1164 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1165 };
1166
1167 static const uint32_t pp_nv12_load_save_pl3_gen8[][4] = {
1168 #include "shaders/post_processing/gen8/pl2_to_pl3.g8b"
1169 };
1170
1171 static const uint32_t pp_pl3_load_save_nv12_gen8[][4] = {
1172 #include "shaders/post_processing/gen8/pl3_to_pl2.g8b"
1173 };
1174
1175 static const uint32_t pp_pl3_load_save_pl3_gen8[][4] = {
1176 #include "shaders/post_processing/gen8/pl3_to_pl3.g8b"
1177 };
1178
1179 static const uint32_t pp_nv12_scaling_gen8[][4] = {
1180 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1181 };
1182
1183 static const uint32_t pp_nv12_avs_gen8[][4] = {
1184 #include "shaders/post_processing/gen8/pl2_to_pl2.g8b"
1185 };
1186
1187 static const uint32_t pp_nv12_dndi_gen8[][4] = {
1188 // #include "shaders/post_processing/gen7/dndi.g75b"
1189 };
1190
1191 static const uint32_t pp_nv12_dn_gen8[][4] = {
1192 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
1193 };
1194 static const uint32_t pp_nv12_load_save_pa_gen8[][4] = {
1195 #include "shaders/post_processing/gen8/pl2_to_pa.g8b"
1196 };
1197 static const uint32_t pp_pl3_load_save_pa_gen8[][4] = {
1198 #include "shaders/post_processing/gen8/pl3_to_pa.g8b"
1199 };
1200 static const uint32_t pp_pa_load_save_nv12_gen8[][4] = {
1201 #include "shaders/post_processing/gen8/pa_to_pl2.g8b"
1202 };
1203 static const uint32_t pp_pa_load_save_pl3_gen8[][4] = {
1204 #include "shaders/post_processing/gen8/pa_to_pl3.g8b"
1205 };
1206 static const uint32_t pp_pa_load_save_pa_gen8[][4] = {
1207 #include "shaders/post_processing/gen8/pa_to_pa.g8b"
1208 };
1209 static const uint32_t pp_rgbx_load_save_nv12_gen8[][4] = {
1210 #include "shaders/post_processing/gen8/rgbx_to_nv12.g8b"
1211 };
1212 static const uint32_t pp_nv12_load_save_rgbx_gen8[][4] = {
1213 #include "shaders/post_processing/gen8/pl2_to_rgbx.g8b"
1214 };
1215
1216
1217 static struct pp_module pp_modules_gen8[] = {
1218     {
1219         {
1220             "NULL module (for testing)",
1221             PP_NULL,
1222             pp_null_gen8,
1223             sizeof(pp_null_gen8),
1224             NULL,
1225         },
1226
1227         pp_null_initialize,
1228     },
1229
1230     {
1231         {
1232             "NV12_NV12",
1233             PP_NV12_LOAD_SAVE_N12,
1234             pp_nv12_load_save_nv12_gen8,
1235             sizeof(pp_nv12_load_save_nv12_gen8),
1236             NULL,
1237         },
1238
1239         gen8_pp_plx_avs_initialize,
1240     },
1241
1242     {
1243         {
1244             "NV12_PL3",
1245             PP_NV12_LOAD_SAVE_PL3,
1246             pp_nv12_load_save_pl3_gen8,
1247             sizeof(pp_nv12_load_save_pl3_gen8),
1248             NULL,
1249         },
1250         
1251         gen8_pp_plx_avs_initialize,
1252     },
1253
1254     {
1255         {
1256             "PL3_NV12",
1257             PP_PL3_LOAD_SAVE_N12,
1258             pp_pl3_load_save_nv12_gen8,
1259             sizeof(pp_pl3_load_save_nv12_gen8),
1260             NULL,
1261         },
1262
1263         gen8_pp_plx_avs_initialize,
1264     },
1265
1266     {
1267         {
1268             "PL3_PL3",
1269             PP_PL3_LOAD_SAVE_N12,
1270             pp_pl3_load_save_pl3_gen8,
1271             sizeof(pp_pl3_load_save_pl3_gen8),
1272             NULL,
1273         },
1274
1275         gen8_pp_plx_avs_initialize,
1276     },
1277
1278     {
1279         {
1280             "NV12 Scaling module",
1281             PP_NV12_SCALING,
1282             pp_nv12_scaling_gen8,
1283             sizeof(pp_nv12_scaling_gen8),
1284             NULL,
1285         },
1286
1287         gen8_pp_plx_avs_initialize,
1288     },
1289
1290     {
1291         {
1292             "NV12 AVS module",
1293             PP_NV12_AVS,
1294             pp_nv12_avs_gen8,
1295             sizeof(pp_nv12_avs_gen8),
1296             NULL,
1297         },
1298
1299         gen8_pp_plx_avs_initialize,
1300     },
1301
1302     {
1303         {
1304             "NV12 DNDI module",
1305             PP_NV12_DNDI,
1306             pp_nv12_dndi_gen8,
1307             sizeof(pp_nv12_dndi_gen8),
1308             NULL,
1309         },
1310
1311         gen8_pp_plx_avs_initialize,
1312     },
1313
1314     {
1315         {
1316             "NV12 DN module",
1317             PP_NV12_DN,
1318             pp_nv12_dn_gen8,
1319             sizeof(pp_nv12_dn_gen8),
1320             NULL,
1321         },
1322
1323         pp_null_initialize,
1324     },
1325     {
1326         {
1327             "NV12_PA module",
1328             PP_NV12_LOAD_SAVE_PA,
1329             pp_nv12_load_save_pa_gen8,
1330             sizeof(pp_nv12_load_save_pa_gen8),
1331             NULL,
1332         },
1333     
1334         gen8_pp_plx_avs_initialize,
1335     },
1336
1337     {
1338         {
1339             "PL3_PA module",
1340             PP_PL3_LOAD_SAVE_PA,
1341             pp_pl3_load_save_pa_gen8,
1342             sizeof(pp_pl3_load_save_pa_gen8),
1343             NULL,
1344         },
1345     
1346         gen8_pp_plx_avs_initialize,
1347     },
1348
1349     {
1350         {
1351             "PA_NV12 module",
1352             PP_PA_LOAD_SAVE_NV12,
1353             pp_pa_load_save_nv12_gen8,
1354             sizeof(pp_pa_load_save_nv12_gen8),
1355             NULL,
1356         },
1357     
1358         gen8_pp_plx_avs_initialize,
1359     },
1360
1361     {
1362         {
1363             "PA_PL3 module",
1364             PP_PA_LOAD_SAVE_PL3,
1365             pp_pa_load_save_pl3_gen8,
1366             sizeof(pp_pa_load_save_pl3_gen8),
1367             NULL,
1368         },
1369     
1370         gen8_pp_plx_avs_initialize,
1371     },
1372     
1373     {
1374         {
1375             "PA_PA module",
1376             PP_PA_LOAD_SAVE_PA,
1377             pp_pa_load_save_pa_gen8,
1378             sizeof(pp_pa_load_save_pa_gen8),
1379             NULL,
1380         },
1381
1382         pp_null_initialize,
1383     },
1384
1385     {
1386         {
1387             "RGBX_NV12 module",
1388             PP_RGBX_LOAD_SAVE_NV12,
1389             pp_rgbx_load_save_nv12_gen8,
1390             sizeof(pp_rgbx_load_save_nv12_gen8),
1391             NULL,
1392         },
1393     
1394         gen8_pp_plx_avs_initialize,
1395     },
1396
1397     {
1398         {
1399             "NV12_RGBX module",
1400             PP_NV12_LOAD_SAVE_RGBX,
1401             pp_nv12_load_save_rgbx_gen8,
1402             sizeof(pp_nv12_load_save_rgbx_gen8),
1403             NULL,
1404         },
1405     
1406         gen8_pp_plx_avs_initialize,
1407     },
1408             
1409 };
1410
1411           
1412 static int
1413 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1414 {
1415     int fourcc;
1416
1417     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1418         struct object_image *obj_image = (struct object_image *)surface->base;
1419         fourcc = obj_image->image.format.fourcc;
1420     } else {
1421         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1422         fourcc = obj_surface->fourcc;
1423     }
1424
1425     return fourcc;
1426 }
1427
1428 static void
1429 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1430 {
1431     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1432         struct object_image *obj_image = (struct object_image *)surface->base;
1433
1434         *width = obj_image->image.width;
1435         *height = obj_image->image.height;
1436     } else {
1437         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1438
1439         *width = obj_surface->orig_width;
1440         *height = obj_surface->orig_height;
1441     }
1442 }
1443
1444 static void
1445 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1446 {
1447     switch (tiling) {
1448     case I915_TILING_NONE:
1449         ss->ss3.tiled_surface = 0;
1450         ss->ss3.tile_walk = 0;
1451         break;
1452     case I915_TILING_X:
1453         ss->ss3.tiled_surface = 1;
1454         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1455         break;
1456     case I915_TILING_Y:
1457         ss->ss3.tiled_surface = 1;
1458         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1459         break;
1460     }
1461 }
1462
1463 static void
1464 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1465 {
1466     switch (tiling) {
1467     case I915_TILING_NONE:
1468         ss->ss2.tiled_surface = 0;
1469         ss->ss2.tile_walk = 0;
1470         break;
1471     case I915_TILING_X:
1472         ss->ss2.tiled_surface = 1;
1473         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1474         break;
1475     case I915_TILING_Y:
1476         ss->ss2.tiled_surface = 1;
1477         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1478         break;
1479     }
1480 }
1481
1482 static void
1483 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1484 {
1485     switch (tiling) {
1486     case I915_TILING_NONE:
1487         ss->ss0.tiled_surface = 0;
1488         ss->ss0.tile_walk = 0;
1489         break;
1490     case I915_TILING_X:
1491         ss->ss0.tiled_surface = 1;
1492         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1493         break;
1494     case I915_TILING_Y:
1495         ss->ss0.tiled_surface = 1;
1496         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1497         break;
1498     }
1499 }
1500
1501 static void
1502 gen8_pp_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
1503 {
1504     switch (tiling) {
1505     case I915_TILING_NONE:
1506         ss->ss0.tiled_surface = 0;
1507         ss->ss0.tile_walk = 0;
1508         break;
1509     case I915_TILING_X:
1510         ss->ss0.tiled_surface = 1;
1511         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1512         break;
1513     case I915_TILING_Y:
1514         ss->ss0.tiled_surface = 1;
1515         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1516         break;
1517     }
1518 }
1519
1520 static void
1521 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1522 {
1523     switch (tiling) {
1524     case I915_TILING_NONE:
1525         ss->ss2.tiled_surface = 0;
1526         ss->ss2.tile_walk = 0;
1527         break;
1528     case I915_TILING_X:
1529         ss->ss2.tiled_surface = 1;
1530         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1531         break;
1532     case I915_TILING_Y:
1533         ss->ss2.tiled_surface = 1;
1534         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1535         break;
1536     }
1537 }
1538
1539 static void
1540 gen8_pp_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
1541 {
1542     switch (tiling) {
1543     case I915_TILING_NONE:
1544         ss->ss2.tiled_surface = 0;
1545         ss->ss2.tile_walk = 0;
1546         break;
1547     case I915_TILING_X:
1548         ss->ss2.tiled_surface = 1;
1549         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1550         break;
1551     case I915_TILING_Y:
1552         ss->ss2.tiled_surface = 1;
1553         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1554         break;
1555     }
1556 }
1557
1558 static void
1559 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1560 {
1561     struct i965_interface_descriptor *desc;
1562     dri_bo *bo;
1563     int pp_index = pp_context->current_pp;
1564
1565     bo = pp_context->idrt.bo;
1566     dri_bo_map(bo, 1);
1567     assert(bo->virtual);
1568     desc = bo->virtual;
1569     memset(desc, 0, sizeof(*desc));
1570     desc->desc0.grf_reg_blocks = 10;
1571     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1572     desc->desc1.const_urb_entry_read_offset = 0;
1573     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1574     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1575     desc->desc2.sampler_count = 0;
1576     desc->desc3.binding_table_entry_count = 0;
1577     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1578
1579     dri_bo_emit_reloc(bo,
1580                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1581                       desc->desc0.grf_reg_blocks,
1582                       offsetof(struct i965_interface_descriptor, desc0),
1583                       pp_context->pp_modules[pp_index].kernel.bo);
1584
1585     dri_bo_emit_reloc(bo,
1586                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1587                       desc->desc2.sampler_count << 2,
1588                       offsetof(struct i965_interface_descriptor, desc2),
1589                       pp_context->sampler_state_table.bo);
1590
1591     dri_bo_unmap(bo);
1592     pp_context->idrt.num_interface_descriptors++;
1593 }
1594
1595 static void
1596 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1597 {
1598     struct i965_vfe_state *vfe_state;
1599     dri_bo *bo;
1600
1601     bo = pp_context->vfe_state.bo;
1602     dri_bo_map(bo, 1);
1603     assert(bo->virtual);
1604     vfe_state = bo->virtual;
1605     memset(vfe_state, 0, sizeof(*vfe_state));
1606     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1607     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1608     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1609     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1610     vfe_state->vfe1.children_present = 0;
1611     vfe_state->vfe2.interface_descriptor_base = 
1612         pp_context->idrt.bo->offset >> 4; /* reloc */
1613     dri_bo_emit_reloc(bo,
1614                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1615                       0,
1616                       offsetof(struct i965_vfe_state, vfe2),
1617                       pp_context->idrt.bo);
1618     dri_bo_unmap(bo);
1619 }
1620
1621 static void
1622 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1623 {
1624     unsigned char *constant_buffer;
1625     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1626
1627     assert(sizeof(*pp_static_parameter) == 128);
1628     dri_bo_map(pp_context->curbe.bo, 1);
1629     assert(pp_context->curbe.bo->virtual);
1630     constant_buffer = pp_context->curbe.bo->virtual;
1631     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1632     dri_bo_unmap(pp_context->curbe.bo);
1633 }
1634
1635 static void
1636 ironlake_pp_states_setup(VADriverContextP ctx,
1637                          struct i965_post_processing_context *pp_context)
1638 {
1639     ironlake_pp_interface_descriptor_table(pp_context);
1640     ironlake_pp_vfe_state(pp_context);
1641     ironlake_pp_upload_constants(pp_context);
1642 }
1643
1644 static void
1645 ironlake_pp_pipeline_select(VADriverContextP ctx,
1646                             struct i965_post_processing_context *pp_context)
1647 {
1648     struct intel_batchbuffer *batch = pp_context->batch;
1649
1650     BEGIN_BATCH(batch, 1);
1651     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1652     ADVANCE_BATCH(batch);
1653 }
1654
1655 static void
1656 ironlake_pp_urb_layout(VADriverContextP ctx,
1657                        struct i965_post_processing_context *pp_context)
1658 {
1659     struct intel_batchbuffer *batch = pp_context->batch;
1660     unsigned int vfe_fence, cs_fence;
1661
1662     vfe_fence = pp_context->urb.cs_start;
1663     cs_fence = pp_context->urb.size;
1664
1665     BEGIN_BATCH(batch, 3);
1666     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1667     OUT_BATCH(batch, 0);
1668     OUT_BATCH(batch, 
1669               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1670               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1671     ADVANCE_BATCH(batch);
1672 }
1673
1674 static void
1675 ironlake_pp_state_base_address(VADriverContextP ctx,
1676                                struct i965_post_processing_context *pp_context)
1677 {
1678     struct intel_batchbuffer *batch = pp_context->batch;
1679
1680     BEGIN_BATCH(batch, 8);
1681     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1682     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1683     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1684     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1685     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1686     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1687     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1688     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1689     ADVANCE_BATCH(batch);
1690 }
1691
1692 static void
1693 ironlake_pp_state_pointers(VADriverContextP ctx,
1694                            struct i965_post_processing_context *pp_context)
1695 {
1696     struct intel_batchbuffer *batch = pp_context->batch;
1697
1698     BEGIN_BATCH(batch, 3);
1699     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1700     OUT_BATCH(batch, 0);
1701     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1702     ADVANCE_BATCH(batch);
1703 }
1704
1705 static void 
1706 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1707                           struct i965_post_processing_context *pp_context)
1708 {
1709     struct intel_batchbuffer *batch = pp_context->batch;
1710
1711     BEGIN_BATCH(batch, 2);
1712     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1713     OUT_BATCH(batch,
1714               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1715               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1716     ADVANCE_BATCH(batch);
1717 }
1718
1719 static void
1720 ironlake_pp_constant_buffer(VADriverContextP ctx,
1721                             struct i965_post_processing_context *pp_context)
1722 {
1723     struct intel_batchbuffer *batch = pp_context->batch;
1724
1725     BEGIN_BATCH(batch, 2);
1726     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1727     OUT_RELOC(batch, pp_context->curbe.bo,
1728               I915_GEM_DOMAIN_INSTRUCTION, 0,
1729               pp_context->urb.size_cs_entry - 1);
1730     ADVANCE_BATCH(batch);    
1731 }
1732
1733 static void
1734 ironlake_pp_object_walker(VADriverContextP ctx,
1735                           struct i965_post_processing_context *pp_context)
1736 {
1737     struct intel_batchbuffer *batch = pp_context->batch;
1738     int x, x_steps, y, y_steps;
1739     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1740
1741     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1742     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1743
1744     for (y = 0; y < y_steps; y++) {
1745         for (x = 0; x < x_steps; x++) {
1746             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1747                 BEGIN_BATCH(batch, 20);
1748                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1749                 OUT_BATCH(batch, 0);
1750                 OUT_BATCH(batch, 0); /* no indirect data */
1751                 OUT_BATCH(batch, 0);
1752
1753                 /* inline data grf 5-6 */
1754                 assert(sizeof(*pp_inline_parameter) == 64);
1755                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1756
1757                 ADVANCE_BATCH(batch);
1758             }
1759         }
1760     }
1761 }
1762
1763 static void
1764 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1765                            struct i965_post_processing_context *pp_context)
1766 {
1767     struct intel_batchbuffer *batch = pp_context->batch;
1768
1769     intel_batchbuffer_start_atomic(batch, 0x1000);
1770     intel_batchbuffer_emit_mi_flush(batch);
1771     ironlake_pp_pipeline_select(ctx, pp_context);
1772     ironlake_pp_state_base_address(ctx, pp_context);
1773     ironlake_pp_state_pointers(ctx, pp_context);
1774     ironlake_pp_urb_layout(ctx, pp_context);
1775     ironlake_pp_cs_urb_layout(ctx, pp_context);
1776     ironlake_pp_constant_buffer(ctx, pp_context);
1777     ironlake_pp_object_walker(ctx, pp_context);
1778     intel_batchbuffer_end_atomic(batch);
1779 }
1780
1781 // update u/v offset when the surface format are packed yuv
1782 static void i965_update_src_surface_static_parameter(
1783     VADriverContextP    ctx, 
1784     struct i965_post_processing_context *pp_context,
1785     const struct i965_surface *surface)
1786 {
1787     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1788     int fourcc = pp_get_surface_fourcc(ctx, surface);
1789
1790     switch (fourcc) {
1791     case VA_FOURCC('Y', 'U', 'Y', '2'):
1792         pp_static_parameter->grf1.source_packed_u_offset = 1;
1793         pp_static_parameter->grf1.source_packed_v_offset = 3;
1794         break;
1795     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1796         pp_static_parameter->grf1.source_packed_y_offset = 1;
1797         pp_static_parameter->grf1.source_packed_v_offset = 2;
1798         break;
1799     case VA_FOURCC('B', 'G', 'R', 'X'):
1800     case VA_FOURCC('B', 'G', 'R', 'A'):
1801         pp_static_parameter->grf1.source_rgb_layout = 0;
1802         break;
1803     case VA_FOURCC('R', 'G', 'B', 'X'):
1804     case VA_FOURCC('R', 'G', 'B', 'A'):
1805         pp_static_parameter->grf1.source_rgb_layout = 1;
1806         break;
1807     default:
1808         break;
1809     }
1810     
1811 }
1812
1813 static void i965_update_dst_surface_static_parameter(
1814     VADriverContextP    ctx, 
1815     struct i965_post_processing_context *pp_context,
1816     const struct i965_surface *surface)
1817 {
1818     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1819     int fourcc = pp_get_surface_fourcc(ctx, surface);
1820
1821     switch (fourcc) {
1822     case VA_FOURCC('Y', 'U', 'Y', '2'):
1823         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1824         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1825         break;
1826     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1827         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1828         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1829         break;
1830     case VA_FOURCC('B', 'G', 'R', 'X'):
1831     case VA_FOURCC('B', 'G', 'R', 'A'):
1832         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1833         break;
1834     case VA_FOURCC('R', 'G', 'B', 'X'):
1835     case VA_FOURCC('R', 'G', 'B', 'A'):
1836         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1837         break;
1838     default:
1839         break;
1840     }
1841     
1842 }
1843
1844 static void
1845 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1846                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1847                           int width, int height, int pitch, int format, 
1848                           int index, int is_target)
1849 {
1850     struct i965_surface_state *ss;
1851     dri_bo *ss_bo;
1852     unsigned int tiling;
1853     unsigned int swizzle;
1854
1855     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1856     ss_bo = pp_context->surface_state_binding_table.bo;
1857     assert(ss_bo);
1858
1859     dri_bo_map(ss_bo, True);
1860     assert(ss_bo->virtual);
1861     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1862     memset(ss, 0, sizeof(*ss));
1863     ss->ss0.surface_type = I965_SURFACE_2D;
1864     ss->ss0.surface_format = format;
1865     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1866     ss->ss2.width = width - 1;
1867     ss->ss2.height = height - 1;
1868     ss->ss3.pitch = pitch - 1;
1869     pp_set_surface_tiling(ss, tiling);
1870     dri_bo_emit_reloc(ss_bo,
1871                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1872                       surf_bo_offset,
1873                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1874                       surf_bo);
1875     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1876     dri_bo_unmap(ss_bo);
1877 }
1878
1879 static void
1880 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1881                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1882                            int width, int height, int wpitch,
1883                            int xoffset, int yoffset,
1884                            int format, int interleave_chroma,
1885                            int index)
1886 {
1887     struct i965_surface_state2 *ss2;
1888     dri_bo *ss2_bo;
1889     unsigned int tiling;
1890     unsigned int swizzle;
1891
1892     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1893     ss2_bo = pp_context->surface_state_binding_table.bo;
1894     assert(ss2_bo);
1895
1896     dri_bo_map(ss2_bo, True);
1897     assert(ss2_bo->virtual);
1898     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1899     memset(ss2, 0, sizeof(*ss2));
1900     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1901     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1902     ss2->ss1.width = width - 1;
1903     ss2->ss1.height = height - 1;
1904     ss2->ss2.pitch = wpitch - 1;
1905     ss2->ss2.interleave_chroma = interleave_chroma;
1906     ss2->ss2.surface_format = format;
1907     ss2->ss3.x_offset_for_cb = xoffset;
1908     ss2->ss3.y_offset_for_cb = yoffset;
1909     pp_set_surface2_tiling(ss2, tiling);
1910     dri_bo_emit_reloc(ss2_bo,
1911                       I915_GEM_DOMAIN_RENDER, 0,
1912                       surf_bo_offset,
1913                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1914                       surf_bo);
1915     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1916     dri_bo_unmap(ss2_bo);
1917 }
1918
1919 static void
1920 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1921                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1922                           int width, int height, int pitch, int format, 
1923                           int index, int is_target)
1924 {
1925     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1926     struct gen7_surface_state *ss;
1927     dri_bo *ss_bo;
1928     unsigned int tiling;
1929     unsigned int swizzle;
1930
1931     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1932     ss_bo = pp_context->surface_state_binding_table.bo;
1933     assert(ss_bo);
1934
1935     dri_bo_map(ss_bo, True);
1936     assert(ss_bo->virtual);
1937     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1938     memset(ss, 0, sizeof(*ss));
1939     ss->ss0.surface_type = I965_SURFACE_2D;
1940     ss->ss0.surface_format = format;
1941     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1942     ss->ss2.width = width - 1;
1943     ss->ss2.height = height - 1;
1944     ss->ss3.pitch = pitch - 1;
1945     gen7_pp_set_surface_tiling(ss, tiling);
1946     if (IS_HASWELL(i965->intel.device_id))
1947         gen7_render_set_surface_scs(ss);
1948     dri_bo_emit_reloc(ss_bo,
1949                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1950                       surf_bo_offset,
1951                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1952                       surf_bo);
1953     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1954     dri_bo_unmap(ss_bo);
1955 }
1956
1957 static void
1958 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1959                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1960                            int width, int height, int wpitch,
1961                            int xoffset, int yoffset,
1962                            int format, int interleave_chroma,
1963                            int index)
1964 {
1965     struct gen7_surface_state2 *ss2;
1966     dri_bo *ss2_bo;
1967     unsigned int tiling;
1968     unsigned int swizzle;
1969
1970     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1971     ss2_bo = pp_context->surface_state_binding_table.bo;
1972     assert(ss2_bo);
1973
1974     dri_bo_map(ss2_bo, True);
1975     assert(ss2_bo->virtual);
1976     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1977     memset(ss2, 0, sizeof(*ss2));
1978     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1979     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1980     ss2->ss1.width = width - 1;
1981     ss2->ss1.height = height - 1;
1982     ss2->ss2.pitch = wpitch - 1;
1983     ss2->ss2.interleave_chroma = interleave_chroma;
1984     ss2->ss2.surface_format = format;
1985     ss2->ss3.x_offset_for_cb = xoffset;
1986     ss2->ss3.y_offset_for_cb = yoffset;
1987     gen7_pp_set_surface2_tiling(ss2, tiling);
1988     dri_bo_emit_reloc(ss2_bo,
1989                       I915_GEM_DOMAIN_RENDER, 0,
1990                       surf_bo_offset,
1991                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1992                       surf_bo);
1993     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1994     dri_bo_unmap(ss2_bo);
1995 }
1996
1997 static void
1998 gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1999                           dri_bo *surf_bo, unsigned long surf_bo_offset,
2000                           int width, int height, int pitch, int format, 
2001                           int index, int is_target)
2002 {
2003     struct gen8_surface_state *ss;
2004     dri_bo *ss_bo;
2005     unsigned int tiling;
2006     unsigned int swizzle;
2007
2008     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
2009     ss_bo = pp_context->surface_state_binding_table.bo;
2010     assert(ss_bo);
2011
2012     dri_bo_map(ss_bo, True);
2013     assert(ss_bo->virtual);
2014     ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
2015     memset(ss, 0, sizeof(*ss));
2016     ss->ss0.surface_type = I965_SURFACE_2D;
2017     ss->ss0.surface_format = format;
2018     ss->ss8.base_addr = surf_bo->offset + surf_bo_offset;
2019     ss->ss2.width = width - 1;
2020     ss->ss2.height = height - 1;
2021     ss->ss3.pitch = pitch - 1;
2022
2023     /* Always set 1(align 4 mode) per B-spec */
2024     ss->ss0.vertical_alignment = 1;
2025     ss->ss0.horizontal_alignment = 1;
2026
2027     gen8_pp_set_surface_tiling(ss, tiling);
2028     gen8_render_set_surface_scs(ss);
2029     dri_bo_emit_reloc(ss_bo,
2030                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
2031                       surf_bo_offset,
2032                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
2033                       surf_bo);
2034     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
2035     dri_bo_unmap(ss_bo);
2036 }
2037
2038
2039 static void
2040 gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2041                            dri_bo *surf_bo, unsigned long surf_bo_offset,
2042                            int width, int height, int wpitch,
2043                            int xoffset, int yoffset,
2044                            int format, int interleave_chroma,
2045                            int index)
2046 {
2047     struct gen8_surface_state2 *ss2;
2048     dri_bo *ss2_bo;
2049     unsigned int tiling;
2050     unsigned int swizzle;
2051
2052     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
2053     ss2_bo = pp_context->surface_state_binding_table.bo;
2054     assert(ss2_bo);
2055
2056     dri_bo_map(ss2_bo, True);
2057     assert(ss2_bo->virtual);
2058     ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
2059     memset(ss2, 0, sizeof(*ss2));
2060     ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset;
2061     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
2062     ss2->ss1.width = width - 1;
2063     ss2->ss1.height = height - 1;
2064     ss2->ss2.pitch = wpitch - 1;
2065     ss2->ss2.interleave_chroma = interleave_chroma;
2066     ss2->ss2.surface_format = format;
2067     ss2->ss3.x_offset_for_cb = xoffset;
2068     ss2->ss3.y_offset_for_cb = yoffset;
2069     gen8_pp_set_surface2_tiling(ss2, tiling);
2070     dri_bo_emit_reloc(ss2_bo,
2071                       I915_GEM_DOMAIN_RENDER, 0,
2072                       surf_bo_offset,
2073                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state2, ss6),
2074                       surf_bo);
2075     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
2076     dri_bo_unmap(ss2_bo);
2077 }
2078
2079 static void 
2080 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2081                                 const struct i965_surface *surface, 
2082                                 int base_index, int is_target,
2083                                 int *width, int *height, int *pitch, int *offset)
2084 {
2085     struct object_surface *obj_surface;
2086     struct object_image *obj_image;
2087     dri_bo *bo;
2088     int fourcc = pp_get_surface_fourcc(ctx, surface);
2089     const int Y = 0;
2090     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
2091     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
2092     const int UV = 1;
2093     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2094     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
2095     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2096                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2097                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2098                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2099     int scale_factor_of_1st_plane_width_in_byte = 1;
2100                               
2101     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2102         obj_surface = (struct object_surface *)surface->base;
2103         bo = obj_surface->bo;
2104         width[0] = obj_surface->orig_width;
2105         height[0] = obj_surface->orig_height;
2106         pitch[0] = obj_surface->width;
2107         offset[0] = 0;
2108
2109         if (full_packed_format) {
2110             scale_factor_of_1st_plane_width_in_byte = 4; 
2111         }
2112         else if (packed_yuv ) {
2113             scale_factor_of_1st_plane_width_in_byte =  2; 
2114         }
2115         else if (interleaved_uv) {
2116             width[1] = obj_surface->orig_width;
2117             height[1] = obj_surface->orig_height / 2;
2118             pitch[1] = obj_surface->width;
2119             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2120         } else {
2121             width[1] = obj_surface->orig_width / 2;
2122             height[1] = obj_surface->orig_height / 2;
2123             pitch[1] = obj_surface->width / 2;
2124             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
2125             width[2] = obj_surface->orig_width / 2;
2126             height[2] = obj_surface->orig_height / 2;
2127             pitch[2] = obj_surface->width / 2;
2128             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
2129         }
2130     } else {
2131         obj_image = (struct object_image *)surface->base;
2132         bo = obj_image->bo;
2133         width[0] = obj_image->image.width;
2134         height[0] = obj_image->image.height;
2135         pitch[0] = obj_image->image.pitches[0];
2136         offset[0] = obj_image->image.offsets[0];
2137
2138         if (full_packed_format) {
2139             scale_factor_of_1st_plane_width_in_byte = 4;
2140         }
2141         else if (packed_yuv ) {
2142             scale_factor_of_1st_plane_width_in_byte = 2;
2143         }
2144         else if (interleaved_uv) {
2145             width[1] = obj_image->image.width;
2146             height[1] = obj_image->image.height / 2;
2147             pitch[1] = obj_image->image.pitches[1];
2148             offset[1] = obj_image->image.offsets[1];
2149         } else {
2150             width[1] = obj_image->image.width / 2;
2151             height[1] = obj_image->image.height / 2;
2152             pitch[1] = obj_image->image.pitches[1];
2153             offset[1] = obj_image->image.offsets[1];
2154             width[2] = obj_image->image.width / 2;
2155             height[2] = obj_image->image.height / 2;
2156             pitch[2] = obj_image->image.pitches[2];
2157             offset[2] = obj_image->image.offsets[2];
2158         }
2159     }
2160
2161     /* Y surface */
2162     i965_pp_set_surface_state(ctx, pp_context,
2163                               bo, offset[Y],
2164                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
2165                               base_index, is_target);
2166
2167     if (!packed_yuv && !full_packed_format) {
2168         if (interleaved_uv) {
2169             i965_pp_set_surface_state(ctx, pp_context,
2170                                       bo, offset[UV],
2171                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
2172                                       base_index + 1, is_target);
2173         } else {
2174             /* U surface */
2175             i965_pp_set_surface_state(ctx, pp_context,
2176                                       bo, offset[U],
2177                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
2178                                       base_index + 1, is_target);
2179
2180             /* V surface */
2181             i965_pp_set_surface_state(ctx, pp_context,
2182                                       bo, offset[V],
2183                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
2184                                       base_index + 2, is_target);
2185         }
2186     }
2187
2188 }
2189
2190 static void 
2191 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2192                                      const struct i965_surface *surface, 
2193                                      int base_index, int is_target,
2194                                      int *width, int *height, int *pitch, int *offset)
2195 {
2196     struct object_surface *obj_surface;
2197     struct object_image *obj_image;
2198     dri_bo *bo;
2199     int fourcc = pp_get_surface_fourcc(ctx, surface);
2200     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2201                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
2202     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2203                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
2204     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2205     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
2206     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2207                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2208                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2209                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2210
2211     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2212         obj_surface = (struct object_surface *)surface->base;
2213         bo = obj_surface->bo;
2214         width[0] = obj_surface->orig_width;
2215         height[0] = obj_surface->orig_height;
2216         pitch[0] = obj_surface->width;
2217         offset[0] = 0;
2218
2219         if (packed_yuv) {
2220             if (is_target)
2221                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
2222             else
2223                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
2224         } else if (rgbx_format) {
2225             if (is_target)
2226                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
2227         }
2228
2229         width[1] = obj_surface->cb_cr_width;
2230         height[1] = obj_surface->cb_cr_height;
2231         pitch[1] = obj_surface->cb_cr_pitch;
2232         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2233
2234         width[2] = obj_surface->cb_cr_width;
2235         height[2] = obj_surface->cb_cr_height;
2236         pitch[2] = obj_surface->cb_cr_pitch;
2237         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2238     } else {
2239         obj_image = (struct object_image *)surface->base;
2240         bo = obj_image->bo;
2241         width[0] = obj_image->image.width;
2242         height[0] = obj_image->image.height;
2243         pitch[0] = obj_image->image.pitches[0];
2244         offset[0] = obj_image->image.offsets[0];
2245
2246         if (rgbx_format) {
2247             if (is_target)
2248                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
2249         } else if (packed_yuv) {
2250             if (is_target)
2251                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
2252             else
2253                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
2254         } else if (interleaved_uv) {
2255             width[1] = obj_image->image.width / 2;
2256             height[1] = obj_image->image.height / 2;
2257             pitch[1] = obj_image->image.pitches[1];
2258             offset[1] = obj_image->image.offsets[1];
2259         } else {
2260             width[1] = obj_image->image.width / 2;
2261             height[1] = obj_image->image.height / 2;
2262             pitch[1] = obj_image->image.pitches[U];
2263             offset[1] = obj_image->image.offsets[U];
2264             width[2] = obj_image->image.width / 2;
2265             height[2] = obj_image->image.height / 2;
2266             pitch[2] = obj_image->image.pitches[V];
2267             offset[2] = obj_image->image.offsets[V];
2268         }
2269     }
2270
2271     if (is_target) {
2272         gen7_pp_set_surface_state(ctx, pp_context,
2273                                   bo, 0,
2274                                   width[0] / 4, height[0], pitch[0],
2275                                   I965_SURFACEFORMAT_R8_UINT,
2276                                   base_index, 1);
2277         if (rgbx_format) {
2278                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2279                 /* the format is MSB: X-B-G-R */
2280                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2281                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2282                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2283                         /* It is stored as MSB: X-R-G-B */
2284                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2285                 }
2286         }
2287         if (!packed_yuv && !rgbx_format) {
2288             if (interleaved_uv) {
2289                 gen7_pp_set_surface_state(ctx, pp_context,
2290                                           bo, offset[1],
2291                                           width[1] / 2, height[1], pitch[1],
2292                                           I965_SURFACEFORMAT_R8G8_SINT,
2293                                           base_index + 1, 1);
2294             } else {
2295                 gen7_pp_set_surface_state(ctx, pp_context,
2296                                           bo, offset[1],
2297                                           width[1] / 4, height[1], pitch[1],
2298                                           I965_SURFACEFORMAT_R8_SINT,
2299                                           base_index + 1, 1);
2300                 gen7_pp_set_surface_state(ctx, pp_context,
2301                                           bo, offset[2],
2302                                           width[2] / 4, height[2], pitch[2],
2303                                           I965_SURFACEFORMAT_R8_SINT,
2304                                           base_index + 2, 1);
2305             }
2306         }
2307     } else {
2308         int format0 = SURFACE_FORMAT_Y8_UNORM;
2309
2310         switch (fourcc) {
2311         case VA_FOURCC('Y', 'U', 'Y', '2'):
2312             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2313             break;
2314
2315         case VA_FOURCC('U', 'Y', 'V', 'Y'):
2316             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2317             break;
2318
2319         default:
2320             break;
2321         }
2322         if (rgbx_format) {
2323             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2324             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2325             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2326             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2327             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2328                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2329                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2330             }
2331         }
2332         gen7_pp_set_surface2_state(ctx, pp_context,
2333                                    bo, offset[0],
2334                                    width[0], height[0], pitch[0],
2335                                    0, 0,
2336                                    format0, 0,
2337                                    base_index);
2338
2339         if (!packed_yuv && !rgbx_format) {
2340             if (interleaved_uv) {
2341                 gen7_pp_set_surface2_state(ctx, pp_context,
2342                                            bo, offset[1],
2343                                            width[1], height[1], pitch[1],
2344                                            0, 0,
2345                                            SURFACE_FORMAT_R8B8_UNORM, 0,
2346                                            base_index + 1);
2347             } else {
2348                 gen7_pp_set_surface2_state(ctx, pp_context,
2349                                            bo, offset[1],
2350                                            width[1], height[1], pitch[1],
2351                                            0, 0,
2352                                            SURFACE_FORMAT_R8_UNORM, 0,
2353                                            base_index + 1);
2354                 gen7_pp_set_surface2_state(ctx, pp_context,
2355                                            bo, offset[2],
2356                                            width[2], height[2], pitch[2],
2357                                            0, 0,
2358                                            SURFACE_FORMAT_R8_UNORM, 0,
2359                                            base_index + 2);
2360             }
2361         }
2362     }
2363 }
2364
2365 static void 
2366 gen8_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2367                                      const struct i965_surface *surface, 
2368                                      int base_index, int is_target,
2369                                      int *width, int *height, int *pitch, int *offset)
2370 {
2371     struct object_surface *obj_surface;
2372     struct object_image *obj_image;
2373     dri_bo *bo;
2374     int fourcc = pp_get_surface_fourcc(ctx, surface);
2375     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2376                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
2377     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
2378                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
2379     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
2380     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
2381     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
2382                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
2383                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
2384                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
2385
2386     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
2387         obj_surface = (struct object_surface *)surface->base;
2388         bo = obj_surface->bo;
2389         width[0] = obj_surface->orig_width;
2390         height[0] = obj_surface->orig_height;
2391         pitch[0] = obj_surface->width;
2392         offset[0] = 0;
2393
2394         if (packed_yuv) {
2395             if (is_target)
2396                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
2397             else
2398                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
2399
2400         } else if (rgbx_format) {
2401             if (is_target)
2402                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
2403         }
2404
2405         width[1] = obj_surface->cb_cr_width;
2406         height[1] = obj_surface->cb_cr_height;
2407         pitch[1] = obj_surface->cb_cr_pitch;
2408         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
2409
2410         width[2] = obj_surface->cb_cr_width;
2411         height[2] = obj_surface->cb_cr_height;
2412         pitch[2] = obj_surface->cb_cr_pitch;
2413         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
2414     } else {
2415         obj_image = (struct object_image *)surface->base;
2416         bo = obj_image->bo;
2417         width[0] = obj_image->image.width;
2418         height[0] = obj_image->image.height;
2419         pitch[0] = obj_image->image.pitches[0];
2420         offset[0] = obj_image->image.offsets[0];
2421
2422         if (rgbx_format) {
2423             if (is_target)
2424                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
2425         } else if (packed_yuv) {
2426             if (is_target)
2427                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
2428             else
2429                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
2430         } else if (interleaved_uv) {
2431             width[1] = obj_image->image.width / 2;
2432             height[1] = obj_image->image.height / 2;
2433             pitch[1] = obj_image->image.pitches[1];
2434             offset[1] = obj_image->image.offsets[1];
2435         } else {
2436             width[1] = obj_image->image.width / 2;
2437             height[1] = obj_image->image.height / 2;
2438             pitch[1] = obj_image->image.pitches[U];
2439             offset[1] = obj_image->image.offsets[U];
2440             width[2] = obj_image->image.width / 2;
2441             height[2] = obj_image->image.height / 2;
2442             pitch[2] = obj_image->image.pitches[V];
2443             offset[2] = obj_image->image.offsets[V];
2444         }
2445     }
2446
2447     if (is_target) {
2448         gen8_pp_set_surface_state(ctx, pp_context,
2449                                   bo, 0,
2450                                   width[0] / 4, height[0], pitch[0],
2451                                   I965_SURFACEFORMAT_R8_UINT,
2452                                   base_index, 1);
2453         if (rgbx_format) {
2454                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2455                 /* the format is MSB: X-B-G-R */
2456                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
2457                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2458                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2459                         /* It is stored as MSB: X-R-G-B */
2460                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
2461                 }
2462         }
2463         if (!packed_yuv && !rgbx_format) {
2464             if (interleaved_uv) {
2465                 gen8_pp_set_surface_state(ctx, pp_context,
2466                                           bo, offset[1],
2467                                           width[1] / 2, height[1], pitch[1],
2468                                           I965_SURFACEFORMAT_R8G8_SINT,
2469                                           base_index + 1, 1);
2470             } else {
2471                 gen8_pp_set_surface_state(ctx, pp_context,
2472                                           bo, offset[1],
2473                                           width[1] / 4, height[1], pitch[1],
2474                                           I965_SURFACEFORMAT_R8_SINT,
2475                                           base_index + 1, 1);
2476                 gen8_pp_set_surface_state(ctx, pp_context,
2477                                           bo, offset[2],
2478                                           width[2] / 4, height[2], pitch[2],
2479                                           I965_SURFACEFORMAT_R8_SINT,
2480                                           base_index + 2, 1);
2481             }
2482         }
2483     } else {
2484         int format0 = SURFACE_FORMAT_Y8_UNORM;
2485
2486         switch (fourcc) {
2487         case VA_FOURCC('Y', 'U', 'Y', '2'):
2488             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
2489             break;
2490
2491         case VA_FOURCC('U', 'Y', 'V', 'Y'):
2492             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
2493             break;
2494
2495         default:
2496             break;
2497         }
2498         if (rgbx_format) {
2499             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2500             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
2501             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
2502             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
2503             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
2504                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
2505                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
2506             }
2507         }
2508         gen8_pp_set_surface2_state(ctx, pp_context,
2509                                    bo, offset[0],
2510                                    width[0], height[0], pitch[0],
2511                                    0, 0,
2512                                    format0, 0,
2513                                    base_index);
2514
2515         if (!packed_yuv && !rgbx_format) {
2516             if (interleaved_uv) {
2517                 gen8_pp_set_surface2_state(ctx, pp_context,
2518                                            bo, offset[1],
2519                                            width[1], height[1], pitch[1],
2520                                            0, 0,
2521                                            SURFACE_FORMAT_R8B8_UNORM, 0,
2522                                            base_index + 1);
2523             } else {
2524                 gen8_pp_set_surface2_state(ctx, pp_context,
2525                                            bo, offset[1],
2526                                            width[1], height[1], pitch[1],
2527                                            0, 0,
2528                                            SURFACE_FORMAT_R8_UNORM, 0,
2529                                            base_index + 1);
2530                 gen8_pp_set_surface2_state(ctx, pp_context,
2531                                            bo, offset[2],
2532                                            width[2], height[2], pitch[2],
2533                                            0, 0,
2534                                            SURFACE_FORMAT_R8_UNORM, 0,
2535                                            base_index + 2);
2536             }
2537         }
2538     }
2539 }
2540
2541 static int
2542 pp_null_x_steps(void *private_context)
2543 {
2544     return 1;
2545 }
2546
2547 static int
2548 pp_null_y_steps(void *private_context)
2549 {
2550     return 1;
2551 }
2552
2553 static int
2554 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2555 {
2556     return 0;
2557 }
2558
2559 static VAStatus
2560 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2561                    const struct i965_surface *src_surface,
2562                    const VARectangle *src_rect,
2563                    struct i965_surface *dst_surface,
2564                    const VARectangle *dst_rect,
2565                    void *filter_param)
2566 {
2567     /* private function & data */
2568     pp_context->pp_x_steps = pp_null_x_steps;
2569     pp_context->pp_y_steps = pp_null_y_steps;
2570     pp_context->private_context = NULL;
2571     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2572
2573     dst_surface->flags = src_surface->flags;
2574
2575     return VA_STATUS_SUCCESS;
2576 }
2577
2578 static int
2579 pp_load_save_x_steps(void *private_context)
2580 {
2581     return 1;
2582 }
2583
2584 static int
2585 pp_load_save_y_steps(void *private_context)
2586 {
2587     struct pp_load_save_context *pp_load_save_context = private_context;
2588
2589     return pp_load_save_context->dest_h / 8;
2590 }
2591
2592 static int
2593 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2594 {
2595     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2596     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2597
2598     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2599     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2600
2601     return 0;
2602 }
2603
2604 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2605 {
2606     int i;
2607     /* x offset of dest surface must be dword aligned.
2608      * so we have to extend dst surface on left edge, and mask out pixels not interested
2609      */
2610     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2611         pp_context->block_horizontal_mask_left = 0;
2612         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2613         {
2614             pp_context->block_horizontal_mask_left |= 1<<i;
2615         }
2616     }
2617     else {
2618         pp_context->block_horizontal_mask_left = 0xffff;
2619     }
2620     
2621     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2622     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2623         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2624     }
2625     else {
2626         pp_context->block_horizontal_mask_right = 0xffff;
2627     }
2628     
2629     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2630         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2631     }
2632     else {
2633         pp_context->block_vertical_mask_bottom = 0xff;
2634     }
2635
2636 }
2637 static VAStatus
2638 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2639                                 const struct i965_surface *src_surface,
2640                                 const VARectangle *src_rect,
2641                                 struct i965_surface *dst_surface,
2642                                 const VARectangle *dst_rect,
2643                                 void *filter_param)
2644 {
2645     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2646     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2647     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2648     int width[3], height[3], pitch[3], offset[3];
2649
2650     /* source surface */
2651     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2652                                     width, height, pitch, offset);
2653
2654     /* destination surface */
2655     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2656                                     width, height, pitch, offset);
2657
2658     /* private function & data */
2659     pp_context->pp_x_steps = pp_load_save_x_steps;
2660     pp_context->pp_y_steps = pp_load_save_y_steps;
2661     pp_context->private_context = &pp_context->pp_load_save_context;
2662     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2663
2664     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2665     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2666     pp_load_save_context->dest_y = dst_rect->y;
2667     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2668     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2669
2670     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2671     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2672
2673     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2674     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2675
2676     // update u/v offset for packed yuv
2677     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2678     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2679
2680     dst_surface->flags = src_surface->flags;
2681
2682     return VA_STATUS_SUCCESS;
2683 }
2684
2685 static int
2686 pp_scaling_x_steps(void *private_context)
2687 {
2688     return 1;
2689 }
2690
2691 static int
2692 pp_scaling_y_steps(void *private_context)
2693 {
2694     struct pp_scaling_context *pp_scaling_context = private_context;
2695
2696     return pp_scaling_context->dest_h / 8;
2697 }
2698
2699 static int
2700 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2701 {
2702     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2703     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2704     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2705     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2706     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2707
2708     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2709     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2710     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2711     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2712     
2713     return 0;
2714 }
2715
2716 static VAStatus
2717 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2718                            const struct i965_surface *src_surface,
2719                            const VARectangle *src_rect,
2720                            struct i965_surface *dst_surface,
2721                            const VARectangle *dst_rect,
2722                            void *filter_param)
2723 {
2724     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2725     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2726     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2727     struct object_surface *obj_surface;
2728     struct i965_sampler_state *sampler_state;
2729     int in_w, in_h, in_wpitch, in_hpitch;
2730     int out_w, out_h, out_wpitch, out_hpitch;
2731
2732     /* source surface */
2733     obj_surface = (struct object_surface *)src_surface->base;
2734     in_w = obj_surface->orig_width;
2735     in_h = obj_surface->orig_height;
2736     in_wpitch = obj_surface->width;
2737     in_hpitch = obj_surface->height;
2738
2739     /* source Y surface index 1 */
2740     i965_pp_set_surface_state(ctx, pp_context,
2741                               obj_surface->bo, 0,
2742                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2743                               1, 0);
2744
2745     /* source UV surface index 2 */
2746     i965_pp_set_surface_state(ctx, pp_context,
2747                               obj_surface->bo, in_wpitch * in_hpitch,
2748                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2749                               2, 0);
2750
2751     /* destination surface */
2752     obj_surface = (struct object_surface *)dst_surface->base;
2753     out_w = obj_surface->orig_width;
2754     out_h = obj_surface->orig_height;
2755     out_wpitch = obj_surface->width;
2756     out_hpitch = obj_surface->height;
2757
2758     /* destination Y surface index 7 */
2759     i965_pp_set_surface_state(ctx, pp_context,
2760                               obj_surface->bo, 0,
2761                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2762                               7, 1);
2763
2764     /* destination UV surface index 8 */
2765     i965_pp_set_surface_state(ctx, pp_context,
2766                               obj_surface->bo, out_wpitch * out_hpitch,
2767                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2768                               8, 1);
2769
2770     /* sampler state */
2771     dri_bo_map(pp_context->sampler_state_table.bo, True);
2772     assert(pp_context->sampler_state_table.bo->virtual);
2773     sampler_state = pp_context->sampler_state_table.bo->virtual;
2774
2775     /* SIMD16 Y index 1 */
2776     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2777     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2778     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2779     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2780     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2781
2782     /* SIMD16 UV index 2 */
2783     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2784     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2785     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2786     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2787     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2788
2789     dri_bo_unmap(pp_context->sampler_state_table.bo);
2790
2791     /* private function & data */
2792     pp_context->pp_x_steps = pp_scaling_x_steps;
2793     pp_context->pp_y_steps = pp_scaling_y_steps;
2794     pp_context->private_context = &pp_context->pp_scaling_context;
2795     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2796
2797     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2798     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2799     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2800     pp_scaling_context->dest_y = dst_rect->y;
2801     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2802     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2803     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2804     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2805
2806     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2807
2808     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2809     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2810     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2811
2812     dst_surface->flags = src_surface->flags;
2813
2814     return VA_STATUS_SUCCESS;
2815 }
2816
2817 static int
2818 pp_avs_x_steps(void *private_context)
2819 {
2820     struct pp_avs_context *pp_avs_context = private_context;
2821
2822     return pp_avs_context->dest_w / 16;
2823 }
2824
2825 static int
2826 pp_avs_y_steps(void *private_context)
2827 {
2828     return 1;
2829 }
2830
2831 static int
2832 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2833 {
2834     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2835     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2836     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2837     float src_x_steping, src_y_steping, video_step_delta;
2838     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2839
2840     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2841         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2842         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2843     } else if (tmp_w >= pp_avs_context->dest_w) {
2844         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2845         pp_inline_parameter->grf6.video_step_delta = 0;
2846         
2847         if (x == 0) {
2848             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2849                 pp_avs_context->src_normalized_x;
2850         } else {
2851             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2852             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2853             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2854                 16 * 15 * video_step_delta / 2;
2855         }
2856     } else {
2857         int n0, n1, n2, nls_left, nls_right;
2858         int factor_a = 5, factor_b = 4;
2859         float f;
2860
2861         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2862         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2863         n2 = tmp_w / (16 * factor_a);
2864         nls_left = n0 + n2;
2865         nls_right = n1 + n2;
2866         f = (float) n2 * 16 / tmp_w;
2867         
2868         if (n0 < 5) {
2869             pp_inline_parameter->grf6.video_step_delta = 0.0;
2870
2871             if (x == 0) {
2872                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2873                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2874             } else {
2875                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2876                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2877                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2878                     16 * 15 * video_step_delta / 2;
2879             }
2880         } else {
2881             if (x < nls_left) {
2882                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2883                 float a = f / (nls_left * 16 * factor_b);
2884                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2885                 
2886                 pp_inline_parameter->grf6.video_step_delta = b;
2887
2888                 if (x == 0) {
2889                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2890                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2891                 } else {
2892                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2893                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2894                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2895                         16 * 15 * video_step_delta / 2;
2896                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2897                 }
2898             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2899                 /* scale the center linearly */
2900                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2901                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2902                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2903                     16 * 15 * video_step_delta / 2;
2904                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2905                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2906             } else {
2907                 float a = f / (nls_right * 16 * factor_b);
2908                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2909
2910                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2911                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2912                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2913                     16 * 15 * video_step_delta / 2;
2914                 pp_inline_parameter->grf6.video_step_delta = -b;
2915
2916                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2917                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2918                 else
2919                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2920             }
2921         }
2922     }
2923
2924     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2925     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2926     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2927     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2928
2929     return 0;
2930 }
2931
2932 static VAStatus
2933 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2934                        const struct i965_surface *src_surface,
2935                        const VARectangle *src_rect,
2936                        struct i965_surface *dst_surface,
2937                        const VARectangle *dst_rect,
2938                        void *filter_param,
2939                        int nlas)
2940 {
2941     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2942     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2943     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2944     struct object_surface *obj_surface;
2945     struct i965_sampler_8x8 *sampler_8x8;
2946     struct i965_sampler_8x8_state *sampler_8x8_state;
2947     int index;
2948     int in_w, in_h, in_wpitch, in_hpitch;
2949     int out_w, out_h, out_wpitch, out_hpitch;
2950     int i;
2951
2952     /* surface */
2953     obj_surface = (struct object_surface *)src_surface->base;
2954     in_w = obj_surface->orig_width;
2955     in_h = obj_surface->orig_height;
2956     in_wpitch = obj_surface->width;
2957     in_hpitch = obj_surface->height;
2958
2959     /* source Y surface index 1 */
2960     i965_pp_set_surface2_state(ctx, pp_context,
2961                                obj_surface->bo, 0,
2962                                in_w, in_h, in_wpitch,
2963                                0, 0,
2964                                SURFACE_FORMAT_Y8_UNORM, 0,
2965                                1);
2966
2967     /* source UV surface index 2 */
2968     i965_pp_set_surface2_state(ctx, pp_context,
2969                                obj_surface->bo, in_wpitch * in_hpitch,
2970                                in_w / 2, in_h / 2, in_wpitch,
2971                                0, 0,
2972                                SURFACE_FORMAT_R8B8_UNORM, 0,
2973                                2);
2974
2975     /* destination surface */
2976     obj_surface = (struct object_surface *)dst_surface->base;
2977     out_w = obj_surface->orig_width;
2978     out_h = obj_surface->orig_height;
2979     out_wpitch = obj_surface->width;
2980     out_hpitch = obj_surface->height;
2981     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2982
2983     /* destination Y surface index 7 */
2984     i965_pp_set_surface_state(ctx, pp_context,
2985                               obj_surface->bo, 0,
2986                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2987                               7, 1);
2988
2989     /* destination UV surface index 8 */
2990     i965_pp_set_surface_state(ctx, pp_context,
2991                               obj_surface->bo, out_wpitch * out_hpitch,
2992                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2993                               8, 1);
2994
2995     /* sampler 8x8 state */
2996     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2997     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2998     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2999     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3000     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3001
3002     for (i = 0; i < 17; i++) {
3003         /* for Y channel, currently ignore */
3004         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
3005         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
3006         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
3007         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
3008         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
3009         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
3010         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
3011         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
3012         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
3013         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
3014         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
3015         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
3016         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
3017         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
3018         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
3019         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
3020         /* for U/V channel, 0.25 */
3021         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
3022         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
3023         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
3024         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
3025         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
3026         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
3027         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
3028         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
3029         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
3030         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
3031         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
3032         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
3033         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
3034         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
3035         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
3036         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
3037     }
3038
3039     sampler_8x8_state->dw136.default_sharpness_level = 0;
3040     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
3041     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
3042     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
3043     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3044
3045     /* sampler 8x8 */
3046     dri_bo_map(pp_context->sampler_state_table.bo, True);
3047     assert(pp_context->sampler_state_table.bo->virtual);
3048     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
3049     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3050
3051     /* sample_8x8 Y index 1 */
3052     index = 1;
3053     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3054     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
3055     sampler_8x8[index].dw0.ief_bypass = 1;
3056     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
3057     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
3058     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3059     sampler_8x8[index].dw2.global_noise_estimation = 22;
3060     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3061     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3062     sampler_8x8[index].dw3.strong_edge_weight = 7;
3063     sampler_8x8[index].dw3.regular_weight = 2;
3064     sampler_8x8[index].dw3.non_edge_weight = 0;
3065     sampler_8x8[index].dw3.gain_factor = 40;
3066     sampler_8x8[index].dw4.steepness_boost = 0;
3067     sampler_8x8[index].dw4.steepness_threshold = 0;
3068     sampler_8x8[index].dw4.mr_boost = 0;
3069     sampler_8x8[index].dw4.mr_threshold = 5;
3070     sampler_8x8[index].dw5.pwl1_point_1 = 4;
3071     sampler_8x8[index].dw5.pwl1_point_2 = 12;
3072     sampler_8x8[index].dw5.pwl1_point_3 = 16;
3073     sampler_8x8[index].dw5.pwl1_point_4 = 26;
3074     sampler_8x8[index].dw6.pwl1_point_5 = 40;
3075     sampler_8x8[index].dw6.pwl1_point_6 = 160;
3076     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
3077     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
3078     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
3079     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
3080     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
3081     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
3082     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
3083     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
3084     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
3085     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
3086     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
3087     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
3088     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
3089     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
3090     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
3091     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
3092     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
3093     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
3094     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
3095     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
3096     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
3097     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
3098     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
3099     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
3100     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
3101     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
3102     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
3103     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
3104     sampler_8x8[index].dw13.limiter_boost = 0;
3105     sampler_8x8[index].dw13.minimum_limiter = 10;
3106     sampler_8x8[index].dw13.maximum_limiter = 11;
3107     sampler_8x8[index].dw14.clip_limiter = 130;
3108     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3109                       I915_GEM_DOMAIN_RENDER, 
3110                       0,
3111                       0,
3112                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3113                       pp_context->sampler_state_table.bo_8x8);
3114
3115     /* sample_8x8 UV index 2 */
3116     index = 2;
3117     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3118     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
3119     sampler_8x8[index].dw0.ief_bypass = 1;
3120     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
3121     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
3122     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3123     sampler_8x8[index].dw2.global_noise_estimation = 22;
3124     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3125     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3126     sampler_8x8[index].dw3.strong_edge_weight = 7;
3127     sampler_8x8[index].dw3.regular_weight = 2;
3128     sampler_8x8[index].dw3.non_edge_weight = 0;
3129     sampler_8x8[index].dw3.gain_factor = 40;
3130     sampler_8x8[index].dw4.steepness_boost = 0;
3131     sampler_8x8[index].dw4.steepness_threshold = 0;
3132     sampler_8x8[index].dw4.mr_boost = 0;
3133     sampler_8x8[index].dw4.mr_threshold = 5;
3134     sampler_8x8[index].dw5.pwl1_point_1 = 4;
3135     sampler_8x8[index].dw5.pwl1_point_2 = 12;
3136     sampler_8x8[index].dw5.pwl1_point_3 = 16;
3137     sampler_8x8[index].dw5.pwl1_point_4 = 26;
3138     sampler_8x8[index].dw6.pwl1_point_5 = 40;
3139     sampler_8x8[index].dw6.pwl1_point_6 = 160;
3140     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
3141     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
3142     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
3143     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
3144     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
3145     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
3146     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
3147     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
3148     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
3149     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
3150     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
3151     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
3152     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
3153     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
3154     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
3155     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
3156     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
3157     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
3158     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
3159     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
3160     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
3161     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
3162     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
3163     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
3164     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
3165     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
3166     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
3167     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
3168     sampler_8x8[index].dw13.limiter_boost = 0;
3169     sampler_8x8[index].dw13.minimum_limiter = 10;
3170     sampler_8x8[index].dw13.maximum_limiter = 11;
3171     sampler_8x8[index].dw14.clip_limiter = 130;
3172     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3173                       I915_GEM_DOMAIN_RENDER, 
3174                       0,
3175                       0,
3176                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3177                       pp_context->sampler_state_table.bo_8x8);
3178
3179     dri_bo_unmap(pp_context->sampler_state_table.bo);
3180
3181     /* private function & data */
3182     pp_context->pp_x_steps = pp_avs_x_steps;
3183     pp_context->pp_y_steps = pp_avs_y_steps;
3184     pp_context->private_context = &pp_context->pp_avs_context;
3185     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
3186
3187     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
3188     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
3189     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
3190     pp_avs_context->dest_y = dst_rect->y;
3191     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
3192     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
3193     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
3194     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
3195     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
3196     pp_avs_context->src_h = src_rect->height;
3197
3198     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
3199     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
3200
3201     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
3202     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
3203     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
3204     pp_inline_parameter->grf6.video_step_delta = 0.0;
3205
3206     dst_surface->flags = src_surface->flags;
3207
3208     return VA_STATUS_SUCCESS;
3209 }
3210
3211 static VAStatus
3212 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3213                             const struct i965_surface *src_surface,
3214                             const VARectangle *src_rect,
3215                             struct i965_surface *dst_surface,
3216                             const VARectangle *dst_rect,
3217                             void *filter_param)
3218 {
3219     return pp_nv12_avs_initialize(ctx, pp_context,
3220                                   src_surface,
3221                                   src_rect,
3222                                   dst_surface,
3223                                   dst_rect,
3224                                   filter_param,
3225                                   1);
3226 }
3227
3228 static VAStatus
3229 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3230                              const struct i965_surface *src_surface,
3231                              const VARectangle *src_rect,
3232                              struct i965_surface *dst_surface,
3233                              const VARectangle *dst_rect,
3234                              void *filter_param)
3235 {
3236     return pp_nv12_avs_initialize(ctx, pp_context,
3237                                   src_surface,
3238                                   src_rect,
3239                                   dst_surface,
3240                                   dst_rect,
3241                                   filter_param,
3242                                   0);    
3243 }
3244
3245 static int
3246 gen7_pp_avs_x_steps(void *private_context)
3247 {
3248     struct pp_avs_context *pp_avs_context = private_context;
3249
3250     return pp_avs_context->dest_w / 16;
3251 }
3252
3253 static int
3254 gen7_pp_avs_y_steps(void *private_context)
3255 {
3256     struct pp_avs_context *pp_avs_context = private_context;
3257
3258     return pp_avs_context->dest_h / 16;
3259 }
3260
3261 static int
3262 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3263 {
3264     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
3265     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3266
3267     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
3268     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
3269     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
3270     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
3271
3272     return 0;
3273 }
3274
3275 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
3276                                               struct i965_post_processing_context *pp_context,
3277                                               const struct i965_surface *surface)
3278 {
3279     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3280     int fourcc = pp_get_surface_fourcc(ctx, surface);
3281     
3282     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
3283         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3284         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3285         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3286     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
3287         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
3288         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
3289         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
3290     }
3291 }
3292
3293 static VAStatus
3294 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3295                            const struct i965_surface *src_surface,
3296                            const VARectangle *src_rect,
3297                            struct i965_surface *dst_surface,
3298                            const VARectangle *dst_rect,
3299                            void *filter_param)
3300 {
3301     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3302     struct i965_driver_data *i965 = i965_driver_data(ctx);
3303     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3304     struct gen7_sampler_8x8 *sampler_8x8;
3305     struct i965_sampler_8x8_state *sampler_8x8_state;
3306     int index, i;
3307     int width[3], height[3], pitch[3], offset[3];
3308     int src_width, src_height;
3309
3310     /* source surface */
3311     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3312                                          width, height, pitch, offset);
3313     src_width = width[0];
3314     src_height = height[0];
3315
3316     /* destination surface */
3317     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3318                                          width, height, pitch, offset);
3319
3320     /* sampler 8x8 state */
3321     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
3322     assert(pp_context->sampler_state_table.bo_8x8->virtual);
3323     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
3324     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
3325     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3326
3327     for (i = 0; i < 17; i++) {
3328         float coff;
3329         coff = i;
3330         coff = coff / 16;
3331         /* for Y channel, currently ignore */
3332         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
3333         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
3334         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
3335         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
3336         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3337         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
3338         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
3339         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
3340         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
3341         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
3342         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
3343         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3344         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3345         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
3346         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
3347         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
3348         /* for U/V channel, 0.25 */
3349         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
3350         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
3351         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
3352         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3353         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3354         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
3355         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
3356         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
3357         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
3358         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
3359         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
3360         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
3361         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
3362         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
3363         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
3364         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
3365     }
3366
3367     sampler_8x8_state->dw136.default_sharpness_level = 0;
3368     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
3369     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
3370     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
3371     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
3372
3373     /* sampler 8x8 */
3374     dri_bo_map(pp_context->sampler_state_table.bo, True);
3375     assert(pp_context->sampler_state_table.bo->virtual);
3376     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
3377     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
3378
3379     /* sample_8x8 Y index 4 */
3380     index = 4;
3381     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3382     sampler_8x8[index].dw0.global_noise_estimation = 255;
3383     sampler_8x8[index].dw0.ief_bypass = 1;
3384
3385     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3386
3387     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3388     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3389     sampler_8x8[index].dw2.r5x_coefficient = 9;
3390     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3391     sampler_8x8[index].dw2.r5c_coefficient = 3;
3392
3393     sampler_8x8[index].dw3.r3x_coefficient = 27;
3394     sampler_8x8[index].dw3.r3c_coefficient = 5;
3395     sampler_8x8[index].dw3.gain_factor = 40;
3396     sampler_8x8[index].dw3.non_edge_weight = 1;
3397     sampler_8x8[index].dw3.regular_weight = 2;
3398     sampler_8x8[index].dw3.strong_edge_weight = 7;
3399     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3400
3401     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3402                       I915_GEM_DOMAIN_RENDER, 
3403                       0,
3404                       0,
3405                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3406                       pp_context->sampler_state_table.bo_8x8);
3407
3408     /* sample_8x8 UV index 8 */
3409     index = 8;
3410     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3411     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3412     sampler_8x8[index].dw0.global_noise_estimation = 255;
3413     sampler_8x8[index].dw0.ief_bypass = 1;
3414     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3415     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3416     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3417     sampler_8x8[index].dw2.r5x_coefficient = 9;
3418     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3419     sampler_8x8[index].dw2.r5c_coefficient = 3;
3420     sampler_8x8[index].dw3.r3x_coefficient = 27;
3421     sampler_8x8[index].dw3.r3c_coefficient = 5;
3422     sampler_8x8[index].dw3.gain_factor = 40;
3423     sampler_8x8[index].dw3.non_edge_weight = 1;
3424     sampler_8x8[index].dw3.regular_weight = 2;
3425     sampler_8x8[index].dw3.strong_edge_weight = 7;
3426     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3427
3428     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3429                       I915_GEM_DOMAIN_RENDER, 
3430                       0,
3431                       0,
3432                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3433                       pp_context->sampler_state_table.bo_8x8);
3434
3435     /* sampler_8x8 V, index 12 */
3436     index = 12;
3437     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3438     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3439     sampler_8x8[index].dw0.global_noise_estimation = 255;
3440     sampler_8x8[index].dw0.ief_bypass = 1;
3441     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3442     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3443     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3444     sampler_8x8[index].dw2.r5x_coefficient = 9;
3445     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3446     sampler_8x8[index].dw2.r5c_coefficient = 3;
3447     sampler_8x8[index].dw3.r3x_coefficient = 27;
3448     sampler_8x8[index].dw3.r3c_coefficient = 5;
3449     sampler_8x8[index].dw3.gain_factor = 40;
3450     sampler_8x8[index].dw3.non_edge_weight = 1;
3451     sampler_8x8[index].dw3.regular_weight = 2;
3452     sampler_8x8[index].dw3.strong_edge_weight = 7;
3453     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3454
3455     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3456                       I915_GEM_DOMAIN_RENDER, 
3457                       0,
3458                       0,
3459                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3460                       pp_context->sampler_state_table.bo_8x8);
3461
3462     dri_bo_unmap(pp_context->sampler_state_table.bo);
3463
3464     /* private function & data */
3465     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3466     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3467     pp_context->private_context = &pp_context->pp_avs_context;
3468     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3469
3470     pp_avs_context->dest_x = dst_rect->x;
3471     pp_avs_context->dest_y = dst_rect->y;
3472     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3473     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3474     pp_avs_context->src_w = src_rect->width;
3475     pp_avs_context->src_h = src_rect->height;
3476     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3477
3478     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3479     dw = MAX(dw, dst_rect->width);
3480
3481     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3482     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
3483     if (IS_HASWELL(i965->intel.device_id))
3484         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
3485
3486     if (pp_static_parameter->grf2.avs_wa_enable) {
3487         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
3488         if ((src_fourcc == VA_FOURCC('R', 'G', 'B', 'A')) ||
3489             (src_fourcc == VA_FOURCC('R', 'G', 'B', 'X')) ||
3490             (src_fourcc == VA_FOURCC('B', 'G', 'R', 'A')) ||
3491             (src_fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
3492             pp_static_parameter->grf2.avs_wa_enable = 0;
3493         }
3494     }
3495         
3496     pp_static_parameter->grf2.avs_wa_width = dw;
3497     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
3498     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
3499
3500     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3501     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3502     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3503         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3504     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3505         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3506
3507     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3508
3509     dst_surface->flags = src_surface->flags;
3510
3511     return VA_STATUS_SUCCESS;
3512 }
3513
3514 static VAStatus
3515 gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3516                            const struct i965_surface *src_surface,
3517                            const VARectangle *src_rect,
3518                            struct i965_surface *dst_surface,
3519                            const VARectangle *dst_rect,
3520                            void *filter_param)
3521 {
3522 /* TODO: Add the sampler_8x8 state */
3523     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
3524     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3525     struct gen8_sampler_8x8_avs *sampler_8x8;
3526     struct i965_sampler_8x8_coefficient *sampler_8x8_state;
3527     int i;
3528     int width[3], height[3], pitch[3], offset[3];
3529     int src_width, src_height;
3530     unsigned char *cc_ptr;
3531
3532     memset(pp_static_parameter, 0, sizeof(struct gen7_pp_static_parameter));
3533
3534     /* source surface */
3535     gen8_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
3536                                          width, height, pitch, offset);
3537     src_height = height[0];
3538     src_width  = width[0];
3539
3540     /* destination surface */
3541     gen8_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
3542                                          width, height, pitch, offset);
3543
3544     /* sampler 8x8 state */
3545     dri_bo_map(pp_context->dynamic_state.bo, True);
3546     assert(pp_context->dynamic_state.bo->virtual);
3547
3548     cc_ptr = (unsigned char *) pp_context->dynamic_state.bo->virtual +
3549         pp_context->sampler_offset;
3550     /* Currently only one gen8 sampler_8x8 is initialized */
3551     sampler_8x8 = (struct gen8_sampler_8x8_avs *)cc_ptr;
3552     memset(sampler_8x8, 0, sizeof(*sampler_8x8));
3553
3554     sampler_8x8->dw0.gain_factor = 44;
3555     sampler_8x8->dw0.weak_edge_threshold = 1;
3556     sampler_8x8->dw0.strong_edge_threshold = 8;
3557     /* Use the value like that on Ivy instead of default
3558      * sampler_8x8->dw0.r3x_coefficient = 5;
3559      */
3560     sampler_8x8->dw0.r3x_coefficient = 27;
3561     sampler_8x8->dw0.r3c_coefficient = 5;
3562
3563     sampler_8x8->dw2.global_noise_estimation = 255;
3564     sampler_8x8->dw2.non_edge_weight = 1;
3565     sampler_8x8->dw2.regular_weight = 2;
3566     sampler_8x8->dw2.strong_edge_weight = 7;
3567     /* Use the value like that on Ivy instead of default
3568      * sampler_8x8->dw2.r5x_coefficient = 7;
3569      * sampler_8x8->dw2.r5cx_coefficient = 7;
3570      * sampler_8x8->dw2.r5c_coefficient = 7;
3571      */
3572     sampler_8x8->dw2.r5x_coefficient = 9;
3573     sampler_8x8->dw2.r5cx_coefficient = 8;
3574     sampler_8x8->dw2.r5c_coefficient = 3;
3575
3576     sampler_8x8->dw3.sin_alpha = 101; /* sin_alpha = 0 */
3577     sampler_8x8->dw3.cos_alpha = 79; /* cos_alpha = 0 */
3578     sampler_8x8->dw3.sat_max = 0x1f;
3579     sampler_8x8->dw3.hue_max = 14;
3580     /* The 8tap filter will determine whether the adaptive Filter is
3581      * applied for all channels(dw153).
3582      * If the 8tap filter is disabled, the adaptive filter should be disabled.
3583      * Only when 8tap filter is enabled, it can be enabled or not
3584      */
3585     sampler_8x8->dw3.enable_8tap_filter = 3;
3586     sampler_8x8->dw3.ief4_smooth_enable = 0;
3587
3588     sampler_8x8->dw4.s3u = 0;
3589     sampler_8x8->dw4.diamond_margin = 4;
3590     sampler_8x8->dw4.vy_std_enable = 0;
3591     sampler_8x8->dw4.umid = 110;
3592     sampler_8x8->dw4.vmid = 154;
3593
3594     sampler_8x8->dw5.diamond_dv = 0;
3595     sampler_8x8->dw5.diamond_th = 35;
3596     sampler_8x8->dw5.diamond_alpha = 100; /* diamond_alpha = 0 */
3597     sampler_8x8->dw5.hs_margin = 3;
3598     sampler_8x8->dw5.diamond_du = 2;
3599
3600     sampler_8x8->dw6.y_point1 = 46;
3601     sampler_8x8->dw6.y_point2 = 47;
3602     sampler_8x8->dw6.y_point3 = 254;
3603     sampler_8x8->dw6.y_point4 = 255;
3604
3605     sampler_8x8->dw7.inv_margin_vyl = 3300; /* inv_margin_vyl = 0 */
3606
3607     sampler_8x8->dw8.inv_margin_vyu = 1600; /* inv_margin_vyu = 0 */
3608     sampler_8x8->dw8.p0l = 46;
3609     sampler_8x8->dw8.p1l = 216;
3610
3611     sampler_8x8->dw9.p2l = 236;
3612     sampler_8x8->dw9.p3l = 236;
3613     sampler_8x8->dw9.b0l = 133;
3614     sampler_8x8->dw9.b1l = 130;
3615
3616     sampler_8x8->dw10.b2l = 130;
3617     sampler_8x8->dw10.b3l = 130;
3618     /* s0l = -5 / 256. s2.8 */
3619     sampler_8x8->dw10.s0l = 1029;    /* s0l = 0 */
3620     sampler_8x8->dw10.y_slope2 = 31; /* y_slop2 = 0 */
3621
3622     sampler_8x8->dw11.s1l = 0;
3623     sampler_8x8->dw11.s2l = 0;
3624
3625     sampler_8x8->dw12.s3l = 0;
3626     sampler_8x8->dw12.p0u = 46;
3627     sampler_8x8->dw12.p1u = 66;
3628     sampler_8x8->dw12.y_slope1 = 31; /* y_slope1 = 0 */
3629
3630     sampler_8x8->dw13.p2u = 130;
3631     sampler_8x8->dw13.p3u = 236;
3632     sampler_8x8->dw13.b0u = 143;
3633     sampler_8x8->dw13.b1u = 163;
3634
3635     sampler_8x8->dw14.b2u = 200;
3636     sampler_8x8->dw14.b3u = 140;
3637     sampler_8x8->dw14.s0u = 256;  /* s0u = 0 */
3638
3639     sampler_8x8->dw15.s1u = 113; /* s1u = 0 */
3640     sampler_8x8->dw15.s2u = 1203; /* s2u = 0 */
3641
3642     sampler_8x8_state = sampler_8x8->coefficients;
3643
3644      for (i = 0; i < 17; i++) {
3645         float coff;
3646         coff = i;
3647         coff = coff / 16;
3648
3649         memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
3650         /* for Y channel, currently ignore */
3651         sampler_8x8_state->dw0.table_0x_filter_c0 = 0x0;
3652         sampler_8x8_state->dw0.table_0x_filter_c1 = 0x0;
3653         sampler_8x8_state->dw0.table_0x_filter_c2 = 0x0;
3654         sampler_8x8_state->dw0.table_0x_filter_c3 =
3655             intel_format_convert(1 - coff, 1, 6, 0);
3656         sampler_8x8_state->dw1.table_0x_filter_c4 =
3657             intel_format_convert(coff, 1, 6, 0);
3658         sampler_8x8_state->dw1.table_0x_filter_c5 = 0x0;
3659         sampler_8x8_state->dw1.table_0x_filter_c6 = 0x0;
3660         sampler_8x8_state->dw1.table_0x_filter_c7 = 0x0;
3661         sampler_8x8_state->dw2.table_0y_filter_c0 = 0x0;
3662         sampler_8x8_state->dw2.table_0y_filter_c1 = 0x0;
3663         sampler_8x8_state->dw2.table_0y_filter_c2 = 0x0;
3664         sampler_8x8_state->dw2.table_0y_filter_c3 =
3665             intel_format_convert(1 - coff, 1, 6, 0);
3666         sampler_8x8_state->dw3.table_0y_filter_c4 =
3667             intel_format_convert(coff, 1, 6, 0);
3668         sampler_8x8_state->dw3.table_0y_filter_c5 = 0x0;
3669         sampler_8x8_state->dw3.table_0y_filter_c6 = 0x0;
3670         sampler_8x8_state->dw3.table_0y_filter_c7 = 0x0;
3671         /* for U/V channel, 0.25 */
3672         sampler_8x8_state->dw4.table_1x_filter_c0 = 0x0;
3673         sampler_8x8_state->dw4.table_1x_filter_c1 = 0x0;
3674         sampler_8x8_state->dw4.table_1x_filter_c2 = 0x0;
3675         sampler_8x8_state->dw4.table_1x_filter_c3 =
3676             intel_format_convert(1 - coff, 1, 6, 0);
3677         sampler_8x8_state->dw5.table_1x_filter_c4 =
3678             intel_format_convert(coff, 1, 6, 0);
3679         sampler_8x8_state->dw5.table_1x_filter_c5 = 0x00;
3680         sampler_8x8_state->dw5.table_1x_filter_c6 = 0x0;
3681         sampler_8x8_state->dw5.table_1x_filter_c7 = 0x0;
3682         sampler_8x8_state->dw6.table_1y_filter_c0 = 0x0;
3683         sampler_8x8_state->dw6.table_1y_filter_c1 = 0x0;
3684         sampler_8x8_state->dw6.table_1y_filter_c2 = 0x0;
3685         sampler_8x8_state->dw6.table_1y_filter_c3 =
3686             intel_format_convert(1 - coff, 1, 6, 0);
3687         sampler_8x8_state->dw7.table_1y_filter_c4 =
3688             intel_format_convert(coff, 1, 6,0);
3689         sampler_8x8_state->dw7.table_1y_filter_c5 = 0x0;
3690         sampler_8x8_state->dw7.table_1y_filter_c6 = 0x0;
3691         sampler_8x8_state->dw7.table_1y_filter_c7 = 0x0;
3692         sampler_8x8_state++;
3693     }
3694
3695     sampler_8x8->dw152.default_sharpness_level = 0;
3696     sampler_8x8->dw153.adaptive_filter_for_all_channel = 1;
3697     sampler_8x8->dw153.bypass_y_adaptive_filtering = 1;
3698     sampler_8x8->dw153.bypass_x_adaptive_filtering = 1;
3699
3700     dri_bo_unmap(pp_context->dynamic_state.bo);
3701
3702
3703     /* private function & data */
3704     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3705     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3706     pp_context->private_context = &pp_context->pp_avs_context;
3707     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3708
3709     pp_avs_context->dest_x = dst_rect->x;
3710     pp_avs_context->dest_y = dst_rect->y;
3711     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3712     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3713     pp_avs_context->src_w = src_rect->width;
3714     pp_avs_context->src_h = src_rect->height;
3715     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3716
3717     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3718     dw = MAX(dw, dst_rect->width);
3719
3720     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3721     pp_static_parameter->grf2.avs_wa_enable = 0; /* It is not required on GEN8+ */
3722     pp_static_parameter->grf2.avs_wa_width = src_width;
3723     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
3724     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
3725
3726     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3727     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
3728     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3729         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3730     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3731         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3732
3733     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3734
3735     dst_surface->flags = src_surface->flags;
3736
3737     return VA_STATUS_SUCCESS;
3738 }
3739
3740 static int
3741 pp_dndi_x_steps(void *private_context)
3742 {
3743     return 1;
3744 }
3745
3746 static int
3747 pp_dndi_y_steps(void *private_context)
3748 {
3749     struct pp_dndi_context *pp_dndi_context = private_context;
3750
3751     return pp_dndi_context->dest_h / 4;
3752 }
3753
3754 static int
3755 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3756 {
3757     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3758
3759     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3760     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3761
3762     return 0;
3763 }
3764
3765 static VAStatus
3766 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3767                         const struct i965_surface *src_surface,
3768                         const VARectangle *src_rect,
3769                         struct i965_surface *dst_surface,
3770                         const VARectangle *dst_rect,
3771                         void *filter_param)
3772 {
3773     struct i965_driver_data *i965 = i965_driver_data(ctx);
3774     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3775     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3776     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3777     struct object_surface *obj_surface;
3778     struct i965_sampler_dndi *sampler_dndi;
3779     int index;
3780     int w, h;
3781     int orig_w, orig_h;
3782     int dndi_top_first = 1;
3783     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3784
3785     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3786         dndi_top_first = 0;
3787     else
3788         dndi_top_first = 1;
3789
3790     /* surface */
3791     obj_surface = (struct object_surface *)src_surface->base;
3792     orig_w = obj_surface->orig_width;
3793     orig_h = obj_surface->orig_height;
3794     w = obj_surface->width;
3795     h = obj_surface->height;
3796
3797     if (pp_dndi_context->stmm_bo == NULL) {
3798         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3799                                                 "STMM surface",
3800                                                 w * h,
3801                                                 4096);
3802         assert(pp_dndi_context->stmm_bo);
3803     }
3804
3805     /* source UV surface index 2 */
3806     i965_pp_set_surface_state(ctx, pp_context,
3807                               obj_surface->bo, w * h,
3808                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3809                               2, 0);
3810
3811     /* source YUV surface index 4 */
3812     i965_pp_set_surface2_state(ctx, pp_context,
3813                                obj_surface->bo, 0,
3814                                orig_w, orig_h, w,
3815                                0, h,
3816                                SURFACE_FORMAT_PLANAR_420_8, 1,
3817                                4);
3818
3819     /* source STMM surface index 20 */
3820     i965_pp_set_surface_state(ctx, pp_context,
3821                               pp_dndi_context->stmm_bo, 0,
3822                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3823                               20, 1);
3824
3825     /* destination surface */
3826     obj_surface = (struct object_surface *)dst_surface->base;
3827     orig_w = obj_surface->orig_width;
3828     orig_h = obj_surface->orig_height;
3829     w = obj_surface->width;
3830     h = obj_surface->height;
3831
3832     /* destination Y surface index 7 */
3833     i965_pp_set_surface_state(ctx, pp_context,
3834                               obj_surface->bo, 0,
3835                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3836                               7, 1);
3837
3838     /* destination UV surface index 8 */
3839     i965_pp_set_surface_state(ctx, pp_context,
3840                               obj_surface->bo, w * h,
3841                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3842                               8, 1);
3843     /* sampler dndi */
3844     dri_bo_map(pp_context->sampler_state_table.bo, True);
3845     assert(pp_context->sampler_state_table.bo->virtual);
3846     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3847     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3848
3849     /* sample dndi index 1 */
3850     index = 0;
3851     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3852     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3853     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3854     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3855
3856     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3857     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3858     sampler_dndi[index].dw1.stmm_c2 = 1;
3859     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3860     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3861
3862     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3863     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3864     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3865     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3866
3867     sampler_dndi[index].dw3.maximum_stmm = 150;
3868     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3869     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3870     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3871     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3872
3873     sampler_dndi[index].dw4.sdi_delta = 5;
3874     sampler_dndi[index].dw4.sdi_threshold = 100;
3875     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3876     sampler_dndi[index].dw4.stmm_shift_up = 1;
3877     sampler_dndi[index].dw4.stmm_shift_down = 0;
3878     sampler_dndi[index].dw4.minimum_stmm = 118;
3879
3880     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3881     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3882     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3883     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3884
3885     sampler_dndi[index].dw6.dn_enable = 1;
3886     sampler_dndi[index].dw6.di_enable = 1;
3887     sampler_dndi[index].dw6.di_partial = 0;
3888     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3889     sampler_dndi[index].dw6.dndi_stream_id = 0;
3890     sampler_dndi[index].dw6.dndi_first_frame = 1;
3891     sampler_dndi[index].dw6.progressive_dn = 0;
3892     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3893     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3894     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3895
3896     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3897     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3898     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3899     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3900
3901     dri_bo_unmap(pp_context->sampler_state_table.bo);
3902
3903     /* private function & data */
3904     pp_context->pp_x_steps = pp_dndi_x_steps;
3905     pp_context->pp_y_steps = pp_dndi_y_steps;
3906     pp_context->private_context = &pp_context->pp_dndi_context;
3907     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3908
3909     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3910     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3911     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3912     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3913
3914     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3915     pp_inline_parameter->grf5.number_blocks = w / 16;
3916     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3917     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3918
3919     pp_dndi_context->dest_w = w;
3920     pp_dndi_context->dest_h = h;
3921
3922     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3923
3924     return VA_STATUS_SUCCESS;
3925 }
3926
3927 static int
3928 pp_dn_x_steps(void *private_context)
3929 {
3930     return 1;
3931 }
3932
3933 static int
3934 pp_dn_y_steps(void *private_context)
3935 {
3936     struct pp_dn_context *pp_dn_context = private_context;
3937
3938     return pp_dn_context->dest_h / 8;
3939 }
3940
3941 static int
3942 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3943 {
3944     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3945
3946     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3947     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3948
3949     return 0;
3950 }
3951
3952 static VAStatus
3953 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3954                       const struct i965_surface *src_surface,
3955                       const VARectangle *src_rect,
3956                       struct i965_surface *dst_surface,
3957                       const VARectangle *dst_rect,
3958                       void *filter_param)
3959 {
3960     struct i965_driver_data *i965 = i965_driver_data(ctx);
3961     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3962     struct object_surface *obj_surface;
3963     struct i965_sampler_dndi *sampler_dndi;
3964     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3965     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3966     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3967     int index;
3968     int w, h;
3969     int orig_w, orig_h;
3970     int dn_strength = 15;
3971     int dndi_top_first = 1;
3972     int dn_progressive = 0;
3973
3974     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3975         dndi_top_first = 1;
3976         dn_progressive = 1;
3977     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3978         dndi_top_first = 1;
3979         dn_progressive = 0;
3980     } else {
3981         dndi_top_first = 0;
3982         dn_progressive = 0;
3983     }
3984
3985     if (dn_filter_param) {
3986         float value = dn_filter_param->value;
3987         
3988         if (value > 1.0)
3989             value = 1.0;
3990         
3991         if (value < 0.0)
3992             value = 0.0;
3993
3994         dn_strength = (int)(value * 31.0F);
3995     }
3996
3997     /* surface */
3998     obj_surface = (struct object_surface *)src_surface->base;
3999     orig_w = obj_surface->orig_width;
4000     orig_h = obj_surface->orig_height;
4001     w = obj_surface->width;
4002     h = obj_surface->height;
4003
4004     if (pp_dn_context->stmm_bo == NULL) {
4005         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4006                                               "STMM surface",
4007                                               w * h,
4008                                               4096);
4009         assert(pp_dn_context->stmm_bo);
4010     }
4011
4012     /* source UV surface index 2 */
4013     i965_pp_set_surface_state(ctx, pp_context,
4014                               obj_surface->bo, w * h,
4015                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4016                               2, 0);
4017
4018     /* source YUV surface index 4 */
4019     i965_pp_set_surface2_state(ctx, pp_context,
4020                                obj_surface->bo, 0,
4021                                orig_w, orig_h, w,
4022                                0, h,
4023                                SURFACE_FORMAT_PLANAR_420_8, 1,
4024                                4);
4025
4026     /* source STMM surface index 20 */
4027     i965_pp_set_surface_state(ctx, pp_context,
4028                               pp_dn_context->stmm_bo, 0,
4029                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4030                               20, 1);
4031
4032     /* destination surface */
4033     obj_surface = (struct object_surface *)dst_surface->base;
4034     orig_w = obj_surface->orig_width;
4035     orig_h = obj_surface->orig_height;
4036     w = obj_surface->width;
4037     h = obj_surface->height;
4038
4039     /* destination Y surface index 7 */
4040     i965_pp_set_surface_state(ctx, pp_context,
4041                               obj_surface->bo, 0,
4042                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4043                               7, 1);
4044
4045     /* destination UV surface index 8 */
4046     i965_pp_set_surface_state(ctx, pp_context,
4047                               obj_surface->bo, w * h,
4048                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4049                               8, 1);
4050     /* sampler dn */
4051     dri_bo_map(pp_context->sampler_state_table.bo, True);
4052     assert(pp_context->sampler_state_table.bo->virtual);
4053     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
4054     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
4055
4056     /* sample dndi index 1 */
4057     index = 0;
4058     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
4059     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
4060     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
4061     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
4062
4063     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4064     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
4065     sampler_dndi[index].dw1.stmm_c2 = 0;
4066     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
4067     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
4068
4069     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4070     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
4071     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
4072     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
4073
4074     sampler_dndi[index].dw3.maximum_stmm = 128;
4075     sampler_dndi[index].dw3.multipler_for_vecm = 2;
4076     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4077     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4078     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
4079
4080     sampler_dndi[index].dw4.sdi_delta = 8;
4081     sampler_dndi[index].dw4.sdi_threshold = 128;
4082     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4083     sampler_dndi[index].dw4.stmm_shift_up = 0;
4084     sampler_dndi[index].dw4.stmm_shift_down = 0;
4085     sampler_dndi[index].dw4.minimum_stmm = 0;
4086
4087     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
4088     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
4089     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4090     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4091
4092     sampler_dndi[index].dw6.dn_enable = 1;
4093     sampler_dndi[index].dw6.di_enable = 0;
4094     sampler_dndi[index].dw6.di_partial = 0;
4095     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
4096     sampler_dndi[index].dw6.dndi_stream_id = 1;
4097     sampler_dndi[index].dw6.dndi_first_frame = 1;
4098     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
4099     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
4100     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
4101     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
4102
4103     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4104     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4105     sampler_dndi[index].dw7.vdi_walker_enable = 0;
4106     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
4107
4108     dri_bo_unmap(pp_context->sampler_state_table.bo);
4109
4110     /* private function & data */
4111     pp_context->pp_x_steps = pp_dn_x_steps;
4112     pp_context->pp_y_steps = pp_dn_y_steps;
4113     pp_context->private_context = &pp_context->pp_dn_context;
4114     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
4115
4116     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
4117     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
4118     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
4119     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
4120
4121     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
4122     pp_inline_parameter->grf5.number_blocks = w / 16;
4123     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4124     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4125
4126     pp_dn_context->dest_w = w;
4127     pp_dn_context->dest_h = h;
4128
4129     dst_surface->flags = src_surface->flags;
4130     
4131     return VA_STATUS_SUCCESS;
4132 }
4133
4134 static int
4135 gen7_pp_dndi_x_steps(void *private_context)
4136 {
4137     struct pp_dndi_context *pp_dndi_context = private_context;
4138
4139     return pp_dndi_context->dest_w / 16;
4140 }
4141
4142 static int
4143 gen7_pp_dndi_y_steps(void *private_context)
4144 {
4145     struct pp_dndi_context *pp_dndi_context = private_context;
4146
4147     return pp_dndi_context->dest_h / 4;
4148 }
4149
4150 static int
4151 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
4152 {
4153     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4154
4155     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
4156     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
4157
4158     return 0;
4159 }
4160
4161
4162 extern VAStatus
4163 vpp_surface_convert(VADriverContextP ctx,
4164                     struct object_surface *src_obj_surf,
4165                     struct object_surface *dst_obj_surf);
4166
4167 static VAStatus
4168 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
4169                              const struct i965_surface *src_surface,
4170                              const VARectangle *src_rect,
4171                              struct i965_surface *dst_surface,
4172                              const VARectangle *dst_rect,
4173                              void *filter_param)
4174 {
4175     struct i965_driver_data *i965 = i965_driver_data(ctx);
4176     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
4177     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
4178     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
4179     struct gen7_sampler_dndi *sampler_dndi;
4180     int index;
4181     int w, h;
4182     int orig_w, orig_h;
4183     int dndi_top_first = 1;
4184     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
4185     int is_first_frame = (pp_dndi_context->frame_order == -1);
4186
4187     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
4188         dndi_top_first = 0;
4189     else
4190         dndi_top_first = 1;
4191
4192     /* surface */
4193     current_in_obj_surface = (struct object_surface *)src_surface->base;
4194
4195     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
4196         previous_in_obj_surface = current_in_obj_surface;
4197         is_first_frame = 1;
4198     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
4199         if (pp_dndi_context->frame_order == 0) {
4200             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
4201             if (!pipeline_param ||
4202                 !pipeline_param->num_forward_references ||
4203                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
4204                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
4205
4206                 return VA_STATUS_ERROR_INVALID_PARAMETER;
4207             } else {
4208                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
4209                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
4210
4211                 is_first_frame = 0;
4212             }
4213         } else if (pp_dndi_context->frame_order == 1) {
4214             vpp_surface_convert(ctx,
4215                                 pp_dndi_context->current_out_obj_surface,
4216                                 (struct object_surface *)dst_surface->base);
4217             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
4218             is_first_frame = 0;
4219
4220             return VA_STATUS_SUCCESS_1;
4221         } else {
4222             previous_in_obj_surface = current_in_obj_surface;
4223             is_first_frame = 1;
4224         }
4225     } else {
4226         return VA_STATUS_ERROR_UNIMPLEMENTED;
4227     }
4228
4229     /* source (temporal reference) YUV surface index 4 */
4230     orig_w = previous_in_obj_surface->orig_width;
4231     orig_h = previous_in_obj_surface->orig_height;
4232     w = previous_in_obj_surface->width;
4233     h = previous_in_obj_surface->height;
4234     gen7_pp_set_surface2_state(ctx, pp_context,
4235                                previous_in_obj_surface->bo, 0,
4236                                orig_w, orig_h, w,
4237                                0, h,
4238                                SURFACE_FORMAT_PLANAR_420_8, 1,
4239                                4);
4240
4241     /* source surface */
4242     orig_w = current_in_obj_surface->orig_width;
4243     orig_h = current_in_obj_surface->orig_height;
4244     w = current_in_obj_surface->width;
4245     h = current_in_obj_surface->height;
4246
4247     /* source UV surface index 1 */
4248     gen7_pp_set_surface_state(ctx, pp_context,
4249                               current_in_obj_surface->bo, w * h,
4250                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4251                               1, 0);
4252
4253     /* source YUV surface index 3 */
4254     gen7_pp_set_surface2_state(ctx, pp_context,
4255                                current_in_obj_surface->bo, 0,
4256                                orig_w, orig_h, w,
4257                                0, h,
4258                                SURFACE_FORMAT_PLANAR_420_8, 1,
4259                                3);
4260
4261     /* STMM / History Statistics input surface, index 5 */
4262     if (pp_dndi_context->stmm_bo == NULL) {
4263         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
4264                                                 "STMM surface",
4265                                                 w * h,
4266                                                 4096);
4267         assert(pp_dndi_context->stmm_bo);
4268     }
4269
4270     gen7_pp_set_surface_state(ctx, pp_context,
4271                               pp_dndi_context->stmm_bo, 0,
4272                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4273                               5, 1);
4274
4275     /* destination surface */
4276     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
4277     orig_w = previous_out_obj_surface->orig_width;
4278     orig_h = previous_out_obj_surface->orig_height;
4279     w = previous_out_obj_surface->width;
4280     h = previous_out_obj_surface->height;
4281
4282     if (is_first_frame) {
4283         current_out_obj_surface = previous_out_obj_surface;
4284     } else {
4285         VAStatus va_status;
4286
4287         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
4288             unsigned int tiling = 0, swizzle = 0;
4289             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
4290
4291             va_status = i965_CreateSurfaces(ctx,
4292                                             orig_w,
4293                                             orig_h,
4294                                             VA_RT_FORMAT_YUV420,
4295                                             1,
4296                                             &pp_dndi_context->current_out_surface);
4297             assert(va_status == VA_STATUS_SUCCESS);
4298             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
4299             assert(pp_dndi_context->current_out_obj_surface);
4300             i965_check_alloc_surface_bo(ctx,
4301                                         pp_dndi_context->current_out_obj_surface,
4302                                         tiling != I915_TILING_NONE,
4303                                         VA_FOURCC('N','V','1','2'),
4304                                         SUBSAMPLE_YUV420);
4305         }
4306
4307         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
4308     }
4309
4310     /* destination(Previous frame) Y surface index 27 */
4311     gen7_pp_set_surface_state(ctx, pp_context,
4312                               previous_out_obj_surface->bo, 0,
4313                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4314                               27, 1);
4315
4316     /* destination(Previous frame) UV surface index 28 */
4317     gen7_pp_set_surface_state(ctx, pp_context,
4318                               previous_out_obj_surface->bo, w * h,
4319                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4320                               28, 1);
4321
4322     /* destination(Current frame) Y surface index 30 */
4323     gen7_pp_set_surface_state(ctx, pp_context,
4324                               current_out_obj_surface->bo, 0,
4325                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4326                               30, 1);
4327
4328     /* destination(Current frame) UV surface index 31 */
4329     orig_w = current_out_obj_surface->orig_width;
4330     orig_h = current_out_obj_surface->orig_height;
4331     w = current_out_obj_surface->width;
4332     h = current_out_obj_surface->height;
4333
4334     gen7_pp_set_surface_state(ctx, pp_context,
4335                               current_out_obj_surface->bo, w * h,
4336                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4337                               31, 1);
4338
4339     /* STMM output surface, index 33 */
4340     gen7_pp_set_surface_state(ctx, pp_context,
4341                               pp_dndi_context->stmm_bo, 0,
4342                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4343                               33, 1);
4344
4345
4346     /* sampler dndi */
4347     dri_bo_map(pp_context->sampler_state_table.bo, True);
4348     assert(pp_context->sampler_state_table.bo->virtual);
4349     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
4350     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
4351
4352     /* sample dndi index 0 */
4353     index = 0;
4354     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
4355     sampler_dndi[index].dw0.dnmh_delt = 7;
4356     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
4357     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
4358     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
4359     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
4360
4361     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
4362     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
4363     sampler_dndi[index].dw1.stmm_c2 = 2;
4364     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
4365     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
4366
4367     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
4368     sampler_dndi[index].dw2.bne_edge_th = 1;
4369     sampler_dndi[index].dw2.smooth_mv_th = 0;
4370     sampler_dndi[index].dw2.sad_tight_th = 5;
4371     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
4372     sampler_dndi[index].dw2.good_neighbor_th = 12;
4373
4374     sampler_dndi[index].dw3.maximum_stmm = 150;
4375     sampler_dndi[index].dw3.multipler_for_vecm = 30;
4376     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
4377     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4378     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
4379
4380     sampler_dndi[index].dw4.sdi_delta = 5;
4381     sampler_dndi[index].dw4.sdi_threshold = 100;
4382     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4383     sampler_dndi[index].dw4.stmm_shift_up = 1;
4384     sampler_dndi[index].dw4.stmm_shift_down = 0;
4385     sampler_dndi[index].dw4.minimum_stmm = 118;
4386
4387     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
4388     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
4389     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
4390     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
4391     sampler_dndi[index].dw6.dn_enable = 0;
4392     sampler_dndi[index].dw6.di_enable = 1;
4393     sampler_dndi[index].dw6.di_partial = 0;
4394     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
4395     sampler_dndi[index].dw6.dndi_stream_id = 1;
4396     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
4397     sampler_dndi[index].dw6.progressive_dn = 0;
4398     sampler_dndi[index].dw6.mcdi_enable = 0;
4399     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
4400     sampler_dndi[index].dw6.cat_th1 = 0;
4401     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
4402     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
4403
4404     sampler_dndi[index].dw7.sad_tha = 5;
4405     sampler_dndi[index].dw7.sad_thb = 10;
4406     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
4407     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
4408     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
4409     sampler_dndi[index].dw7.vdi_walker_enable = 0;
4410     sampler_dndi[index].dw7.neighborpixel_th = 10;
4411     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
4412
4413     dri_bo_unmap(pp_context->sampler_state_table.bo);
4414
4415     /* private function & data */
4416     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
4417     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
4418     pp_context->private_context = &pp_context->pp_dndi_context;
4419     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
4420
4421     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4422     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4423     pp_static_parameter->grf1.di_top_field_first = 0;
4424     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4425
4426     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4427     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4428     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4429
4430     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4431     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4432
4433     pp_dndi_context->dest_w = w;
4434     pp_dndi_context->dest_h = h;
4435
4436     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
4437
4438     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
4439
4440     return VA_STATUS_SUCCESS;
4441 }
4442
4443 static int
4444 gen7_pp_dn_x_steps(void *private_context)
4445 {
4446     struct pp_dn_context *pp_dn_context = private_context;
4447
4448     return pp_dn_context->dest_w / 16;
4449 }
4450
4451 static int
4452 gen7_pp_dn_y_steps(void *private_context)
4453 {
4454     struct pp_dn_context *pp_dn_context = private_context;
4455
4456     return pp_dn_context->dest_h / 4;
4457 }
4458
4459 static int
4460 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
4461 {
4462     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4463
4464     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
4465     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
4466
4467     return 0;
4468 }
4469
4470 static VAStatus
4471 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
4472                            const struct i965_surface *src_surface,
4473                            const VARectangle *src_rect,
4474                            struct i965_surface *dst_surface,
4475                            const VARectangle *dst_rect,
4476                            void *filter_param)
4477 {
4478     struct i965_driver_data *i965 = i965_driver_data(ctx);
4479     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
4480     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
4481     struct object_surface *obj_surface;
4482     struct gen7_sampler_dndi *sampler_dn;
4483     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
4484     int index;
4485     int w, h;
4486     int orig_w, orig_h;
4487     int dn_strength = 15;
4488     int dndi_top_first = 1;
4489     int dn_progressive = 0;
4490
4491     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
4492         dndi_top_first = 1;
4493         dn_progressive = 1;
4494     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
4495         dndi_top_first = 1;
4496         dn_progressive = 0;
4497     } else {
4498         dndi_top_first = 0;
4499         dn_progressive = 0;
4500     }
4501
4502     if (dn_filter_param) {
4503         float value = dn_filter_param->value;
4504         
4505         if (value > 1.0)
4506             value = 1.0;
4507         
4508         if (value < 0.0)
4509             value = 0.0;
4510
4511         dn_strength = (int)(value * 31.0F);
4512     }
4513
4514     /* surface */
4515     obj_surface = (struct object_surface *)src_surface->base;
4516     orig_w = obj_surface->orig_width;
4517     orig_h = obj_surface->orig_height;
4518     w = obj_surface->width;
4519     h = obj_surface->height;
4520
4521     if (pp_dn_context->stmm_bo == NULL) {
4522         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
4523                                              "STMM surface",
4524                                              w * h,
4525                                              4096);
4526         assert(pp_dn_context->stmm_bo);
4527     }
4528
4529     /* source UV surface index 1 */
4530     gen7_pp_set_surface_state(ctx, pp_context,
4531                               obj_surface->bo, w * h,
4532                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4533                               1, 0);
4534
4535     /* source YUV surface index 3 */
4536     gen7_pp_set_surface2_state(ctx, pp_context,
4537                                obj_surface->bo, 0,
4538                                orig_w, orig_h, w,
4539                                0, h,
4540                                SURFACE_FORMAT_PLANAR_420_8, 1,
4541                                3);
4542
4543     /* source (temporal reference) YUV surface index 4 */
4544     gen7_pp_set_surface2_state(ctx, pp_context,
4545                                obj_surface->bo, 0,
4546                                orig_w, orig_h, w,
4547                                0, h,
4548                                SURFACE_FORMAT_PLANAR_420_8, 1,
4549                                4);
4550
4551     /* STMM / History Statistics input surface, index 5 */
4552     gen7_pp_set_surface_state(ctx, pp_context,
4553                               pp_dn_context->stmm_bo, 0,
4554                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4555                               33, 1);
4556
4557     /* destination surface */
4558     obj_surface = (struct object_surface *)dst_surface->base;
4559     orig_w = obj_surface->orig_width;
4560     orig_h = obj_surface->orig_height;
4561     w = obj_surface->width;
4562     h = obj_surface->height;
4563
4564     /* destination Y surface index 24 */
4565     gen7_pp_set_surface_state(ctx, pp_context,
4566                               obj_surface->bo, 0,
4567                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
4568                               24, 1);
4569
4570     /* destination UV surface index 25 */
4571     gen7_pp_set_surface_state(ctx, pp_context,
4572                               obj_surface->bo, w * h,
4573                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
4574                               25, 1);
4575
4576     /* sampler dn */
4577     dri_bo_map(pp_context->sampler_state_table.bo, True);
4578     assert(pp_context->sampler_state_table.bo->virtual);
4579     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
4580     sampler_dn = pp_context->sampler_state_table.bo->virtual;
4581
4582     /* sample dn index 1 */
4583     index = 0;
4584     sampler_dn[index].dw0.denoise_asd_threshold = 0;
4585     sampler_dn[index].dw0.dnmh_delt = 8;
4586     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
4587     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
4588     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
4589     sampler_dn[index].dw0.denoise_stad_threshold = 0;
4590
4591     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
4592     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
4593     sampler_dn[index].dw1.stmm_c2 = 0;
4594     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
4595     sampler_dn[index].dw1.temporal_difference_threshold = 16;
4596
4597     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
4598     sampler_dn[index].dw2.bne_edge_th = 1;
4599     sampler_dn[index].dw2.smooth_mv_th = 0;
4600     sampler_dn[index].dw2.sad_tight_th = 5;
4601     sampler_dn[index].dw2.cat_slope_minus1 = 9;
4602     sampler_dn[index].dw2.good_neighbor_th = 4;
4603
4604     sampler_dn[index].dw3.maximum_stmm = 128;
4605     sampler_dn[index].dw3.multipler_for_vecm = 2;
4606     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
4607     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
4608     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
4609
4610     sampler_dn[index].dw4.sdi_delta = 8;
4611     sampler_dn[index].dw4.sdi_threshold = 128;
4612     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
4613     sampler_dn[index].dw4.stmm_shift_up = 0;
4614     sampler_dn[index].dw4.stmm_shift_down = 0;
4615     sampler_dn[index].dw4.minimum_stmm = 0;
4616
4617     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
4618     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
4619     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
4620     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
4621
4622     sampler_dn[index].dw6.dn_enable = 1;
4623     sampler_dn[index].dw6.di_enable = 0;
4624     sampler_dn[index].dw6.di_partial = 0;
4625     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
4626     sampler_dn[index].dw6.dndi_stream_id = 1;
4627     sampler_dn[index].dw6.dndi_first_frame = 1;
4628     sampler_dn[index].dw6.progressive_dn = dn_progressive;
4629     sampler_dn[index].dw6.mcdi_enable = 0;
4630     sampler_dn[index].dw6.fmd_tear_threshold = 32;
4631     sampler_dn[index].dw6.cat_th1 = 0;
4632     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
4633     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
4634
4635     sampler_dn[index].dw7.sad_tha = 5;
4636     sampler_dn[index].dw7.sad_thb = 10;
4637     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
4638     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
4639     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
4640     sampler_dn[index].dw7.vdi_walker_enable = 0;
4641     sampler_dn[index].dw7.neighborpixel_th = 10;
4642     sampler_dn[index].dw7.column_width_minus1 = w / 16;
4643
4644     dri_bo_unmap(pp_context->sampler_state_table.bo);
4645
4646     /* private function & data */
4647     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
4648     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
4649     pp_context->private_context = &pp_context->pp_dn_context;
4650     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
4651
4652     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
4653     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
4654     pp_static_parameter->grf1.di_top_field_first = 0;
4655     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
4656
4657     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
4658     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
4659     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
4660
4661     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
4662     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
4663
4664     pp_dn_context->dest_w = w;
4665     pp_dn_context->dest_h = h;
4666
4667     dst_surface->flags = src_surface->flags;
4668
4669     return VA_STATUS_SUCCESS;
4670 }
4671
4672 static VAStatus
4673 ironlake_pp_initialize(
4674     VADriverContextP ctx,
4675     struct i965_post_processing_context *pp_context,
4676     const struct i965_surface *src_surface,
4677     const VARectangle *src_rect,
4678     struct i965_surface *dst_surface,
4679     const VARectangle *dst_rect,
4680     int pp_index,
4681     void *filter_param
4682 )
4683 {
4684     VAStatus va_status;
4685     struct i965_driver_data *i965 = i965_driver_data(ctx);
4686     struct pp_module *pp_module;
4687     dri_bo *bo;
4688     int static_param_size, inline_param_size;
4689
4690     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4691     bo = dri_bo_alloc(i965->intel.bufmgr,
4692                       "surface state & binding table",
4693                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4694                       4096);
4695     assert(bo);
4696     pp_context->surface_state_binding_table.bo = bo;
4697
4698     dri_bo_unreference(pp_context->curbe.bo);
4699     bo = dri_bo_alloc(i965->intel.bufmgr,
4700                       "constant buffer",
4701                       4096, 
4702                       4096);
4703     assert(bo);
4704     pp_context->curbe.bo = bo;
4705
4706     dri_bo_unreference(pp_context->idrt.bo);
4707     bo = dri_bo_alloc(i965->intel.bufmgr, 
4708                       "interface discriptor", 
4709                       sizeof(struct i965_interface_descriptor), 
4710                       4096);
4711     assert(bo);
4712     pp_context->idrt.bo = bo;
4713     pp_context->idrt.num_interface_descriptors = 0;
4714
4715     dri_bo_unreference(pp_context->sampler_state_table.bo);
4716     bo = dri_bo_alloc(i965->intel.bufmgr, 
4717                       "sampler state table", 
4718                       4096,
4719                       4096);
4720     assert(bo);
4721     dri_bo_map(bo, True);
4722     memset(bo->virtual, 0, bo->size);
4723     dri_bo_unmap(bo);
4724     pp_context->sampler_state_table.bo = bo;
4725
4726     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4727     bo = dri_bo_alloc(i965->intel.bufmgr, 
4728                       "sampler 8x8 state ",
4729                       4096,
4730                       4096);
4731     assert(bo);
4732     pp_context->sampler_state_table.bo_8x8 = bo;
4733
4734     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4735     bo = dri_bo_alloc(i965->intel.bufmgr, 
4736                       "sampler 8x8 state ",
4737                       4096,
4738                       4096);
4739     assert(bo);
4740     pp_context->sampler_state_table.bo_8x8_uv = bo;
4741
4742     dri_bo_unreference(pp_context->vfe_state.bo);
4743     bo = dri_bo_alloc(i965->intel.bufmgr, 
4744                       "vfe state", 
4745                       sizeof(struct i965_vfe_state), 
4746                       4096);
4747     assert(bo);
4748     pp_context->vfe_state.bo = bo;
4749
4750     static_param_size = sizeof(struct pp_static_parameter);
4751     inline_param_size = sizeof(struct pp_inline_parameter);
4752
4753     memset(pp_context->pp_static_parameter, 0, static_param_size);
4754     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4755     
4756     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4757     pp_context->current_pp = pp_index;
4758     pp_module = &pp_context->pp_modules[pp_index];
4759     
4760     if (pp_module->initialize)
4761         va_status = pp_module->initialize(ctx, pp_context,
4762                                           src_surface,
4763                                           src_rect,
4764                                           dst_surface,
4765                                           dst_rect,
4766                                           filter_param);
4767     else
4768         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4769
4770     return va_status;
4771 }
4772
4773 static VAStatus
4774 ironlake_post_processing(
4775     VADriverContextP   ctx,
4776     struct i965_post_processing_context *pp_context,
4777     const struct i965_surface *src_surface,
4778     const VARectangle *src_rect,
4779     struct i965_surface *dst_surface,
4780     const VARectangle *dst_rect,
4781     int                pp_index,
4782     void *filter_param
4783 )
4784 {
4785     VAStatus va_status;
4786
4787     va_status = ironlake_pp_initialize(ctx, pp_context,
4788                                        src_surface,
4789                                        src_rect,
4790                                        dst_surface,
4791                                        dst_rect,
4792                                        pp_index,
4793                                        filter_param);
4794
4795     if (va_status == VA_STATUS_SUCCESS) {
4796         ironlake_pp_states_setup(ctx, pp_context);
4797         ironlake_pp_pipeline_setup(ctx, pp_context);
4798     }
4799
4800     return va_status;
4801 }
4802
4803 static VAStatus
4804 gen6_pp_initialize(
4805     VADriverContextP ctx,
4806     struct i965_post_processing_context *pp_context,
4807     const struct i965_surface *src_surface,
4808     const VARectangle *src_rect,
4809     struct i965_surface *dst_surface,
4810     const VARectangle *dst_rect,
4811     int pp_index,
4812     void *filter_param
4813 )
4814 {
4815     VAStatus va_status;
4816     struct i965_driver_data *i965 = i965_driver_data(ctx);
4817     struct pp_module *pp_module;
4818     dri_bo *bo;
4819     int static_param_size, inline_param_size;
4820
4821     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4822     bo = dri_bo_alloc(i965->intel.bufmgr,
4823                       "surface state & binding table",
4824                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4825                       4096);
4826     assert(bo);
4827     pp_context->surface_state_binding_table.bo = bo;
4828
4829     dri_bo_unreference(pp_context->curbe.bo);
4830     bo = dri_bo_alloc(i965->intel.bufmgr,
4831                       "constant buffer",
4832                       4096, 
4833                       4096);
4834     assert(bo);
4835     pp_context->curbe.bo = bo;
4836
4837     dri_bo_unreference(pp_context->idrt.bo);
4838     bo = dri_bo_alloc(i965->intel.bufmgr, 
4839                       "interface discriptor", 
4840                       sizeof(struct gen6_interface_descriptor_data), 
4841                       4096);
4842     assert(bo);
4843     pp_context->idrt.bo = bo;
4844     pp_context->idrt.num_interface_descriptors = 0;
4845
4846     dri_bo_unreference(pp_context->sampler_state_table.bo);
4847     bo = dri_bo_alloc(i965->intel.bufmgr, 
4848                       "sampler state table", 
4849                       4096,
4850                       4096);
4851     assert(bo);
4852     dri_bo_map(bo, True);
4853     memset(bo->virtual, 0, bo->size);
4854     dri_bo_unmap(bo);
4855     pp_context->sampler_state_table.bo = bo;
4856
4857     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4858     bo = dri_bo_alloc(i965->intel.bufmgr, 
4859                       "sampler 8x8 state ",
4860                       4096,
4861                       4096);
4862     assert(bo);
4863     pp_context->sampler_state_table.bo_8x8 = bo;
4864
4865     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4866     bo = dri_bo_alloc(i965->intel.bufmgr, 
4867                       "sampler 8x8 state ",
4868                       4096,
4869                       4096);
4870     assert(bo);
4871     pp_context->sampler_state_table.bo_8x8_uv = bo;
4872
4873     dri_bo_unreference(pp_context->vfe_state.bo);
4874     bo = dri_bo_alloc(i965->intel.bufmgr, 
4875                       "vfe state", 
4876                       sizeof(struct i965_vfe_state), 
4877                       4096);
4878     assert(bo);
4879     pp_context->vfe_state.bo = bo;
4880     
4881     if (IS_GEN7(i965->intel.device_id)) {
4882         static_param_size = sizeof(struct gen7_pp_static_parameter);
4883         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4884     } else {
4885         static_param_size = sizeof(struct pp_static_parameter);
4886         inline_param_size = sizeof(struct pp_inline_parameter);
4887     }
4888
4889     memset(pp_context->pp_static_parameter, 0, static_param_size);
4890     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4891
4892     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4893     pp_context->current_pp = pp_index;
4894     pp_module = &pp_context->pp_modules[pp_index];
4895     
4896     if (pp_module->initialize)
4897         va_status = pp_module->initialize(ctx, pp_context,
4898                                           src_surface,
4899                                           src_rect,
4900                                           dst_surface,
4901                                           dst_rect,
4902                                           filter_param);
4903     else
4904         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4905
4906     calculate_boundary_block_mask(pp_context, dst_rect);
4907
4908     return va_status;
4909 }
4910
4911
4912 static VAStatus
4913 gen8_pp_initialize(
4914     VADriverContextP   ctx,
4915     struct i965_post_processing_context *pp_context,
4916     const struct i965_surface *src_surface,
4917     const VARectangle *src_rect,
4918     struct i965_surface *dst_surface,
4919     const VARectangle *dst_rect,
4920     int                pp_index,
4921     void * filter_param
4922 )
4923 {
4924     VAStatus va_status;
4925     struct i965_driver_data *i965 = i965_driver_data(ctx);
4926     dri_bo *bo;
4927     int bo_size;
4928     unsigned int end_offset;
4929     struct pp_module *pp_module;
4930     int static_param_size, inline_param_size;
4931
4932     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4933     bo = dri_bo_alloc(i965->intel.bufmgr,
4934                       "surface state & binding table",
4935                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4936                       4096);
4937     assert(bo);
4938     pp_context->surface_state_binding_table.bo = bo;
4939
4940     pp_context->idrt.num_interface_descriptors = 0;
4941
4942     pp_context->sampler_size = 2 * 4096;
4943
4944     bo_size = 4096 + pp_context->curbe_size + pp_context->sampler_size
4945                 + pp_context->idrt_size;
4946
4947     dri_bo_unreference(pp_context->dynamic_state.bo);
4948     bo = dri_bo_alloc(i965->intel.bufmgr,
4949                       "dynamic_state",
4950                       bo_size,
4951                       4096);
4952
4953     assert(bo);
4954     pp_context->dynamic_state.bo = bo;
4955     pp_context->dynamic_state.bo_size = bo_size;
4956
4957     end_offset = 0;
4958     pp_context->dynamic_state.end_offset = 0;
4959
4960     /* Constant buffer offset */
4961     pp_context->curbe_offset = ALIGN(end_offset, 64);
4962     end_offset = pp_context->curbe_offset + pp_context->curbe_size;
4963
4964     /* Interface descriptor offset */
4965     pp_context->idrt_offset = ALIGN(end_offset, 64);
4966     end_offset = pp_context->idrt_offset + pp_context->idrt_size;
4967
4968     /* Sampler state offset */
4969     pp_context->sampler_offset = ALIGN(end_offset, 64);
4970     end_offset = pp_context->sampler_offset + pp_context->sampler_size;
4971
4972     /* update the end offset of dynamic_state */
4973     pp_context->dynamic_state.end_offset = ALIGN(end_offset, 64);
4974
4975     static_param_size = sizeof(struct gen7_pp_static_parameter);
4976     inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4977
4978     memset(pp_context->pp_static_parameter, 0, static_param_size);
4979     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4980
4981     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4982     pp_context->current_pp = pp_index;
4983     pp_module = &pp_context->pp_modules[pp_index];
4984     
4985     if (pp_module->initialize)
4986         va_status = pp_module->initialize(ctx, pp_context,
4987                                           src_surface,
4988                                           src_rect,
4989                                           dst_surface,
4990                                           dst_rect,
4991                                           filter_param);
4992     else
4993         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4994  
4995     calculate_boundary_block_mask(pp_context, dst_rect);
4996
4997     return va_status;
4998 }
4999
5000 static void
5001 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
5002                                    struct i965_post_processing_context *pp_context)
5003 {
5004     struct i965_driver_data *i965 = i965_driver_data(ctx);
5005     struct gen6_interface_descriptor_data *desc;
5006     dri_bo *bo;
5007     int pp_index = pp_context->current_pp;
5008
5009     bo = pp_context->idrt.bo;
5010     dri_bo_map(bo, True);
5011     assert(bo->virtual);
5012     desc = bo->virtual;
5013     memset(desc, 0, sizeof(*desc));
5014     desc->desc0.kernel_start_pointer = 
5015         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
5016     desc->desc1.single_program_flow = 1;
5017     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
5018     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
5019     desc->desc2.sampler_state_pointer = 
5020         pp_context->sampler_state_table.bo->offset >> 5;
5021     desc->desc3.binding_table_entry_count = 0;
5022     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
5023     desc->desc4.constant_urb_entry_read_offset = 0;
5024
5025     if (IS_GEN7(i965->intel.device_id))
5026         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
5027     else
5028         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
5029
5030     dri_bo_emit_reloc(bo,
5031                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5032                       0,
5033                       offsetof(struct gen6_interface_descriptor_data, desc0),
5034                       pp_context->pp_modules[pp_index].kernel.bo);
5035
5036     dri_bo_emit_reloc(bo,
5037                       I915_GEM_DOMAIN_INSTRUCTION, 0,
5038                       desc->desc2.sampler_count << 2,
5039                       offsetof(struct gen6_interface_descriptor_data, desc2),
5040                       pp_context->sampler_state_table.bo);
5041
5042     dri_bo_unmap(bo);
5043     pp_context->idrt.num_interface_descriptors++;
5044 }
5045
5046 static void
5047 gen8_pp_interface_descriptor_table(VADriverContextP   ctx,
5048                                    struct i965_post_processing_context *pp_context)
5049 {
5050     struct gen8_interface_descriptor_data *desc;
5051     dri_bo *bo;
5052     int pp_index = pp_context->current_pp;
5053     unsigned char *cc_ptr;
5054
5055     bo = pp_context->dynamic_state.bo;
5056
5057     dri_bo_map(bo, 1);
5058     assert(bo->virtual);
5059     cc_ptr = (unsigned char *)bo->virtual + pp_context->idrt_offset;
5060
5061     desc = (struct gen8_interface_descriptor_data *) cc_ptr +
5062                 pp_context->idrt.num_interface_descriptors;
5063
5064     memset(desc, 0, sizeof(*desc));
5065     desc->desc0.kernel_start_pointer = 
5066         pp_context->pp_modules[pp_index].kernel.kernel_offset >> 6; /* reloc */
5067     desc->desc2.single_program_flow = 1;
5068     desc->desc2.floating_point_mode = FLOATING_POINT_IEEE_754;
5069     desc->desc3.sampler_count = 0;      /* 1 - 4 samplers used */
5070     desc->desc3.sampler_state_pointer = pp_context->sampler_offset >> 5;
5071     desc->desc4.binding_table_entry_count = 0;
5072     desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
5073     desc->desc5.constant_urb_entry_read_offset = 0;
5074     
5075     desc->desc5.constant_urb_entry_read_length = 6; /* grf 1-6 */
5076
5077     dri_bo_unmap(bo);
5078     pp_context->idrt.num_interface_descriptors++;
5079 }
5080
5081 static void
5082 gen6_pp_upload_constants(VADriverContextP ctx,
5083                          struct i965_post_processing_context *pp_context)
5084 {
5085     struct i965_driver_data *i965 = i965_driver_data(ctx);
5086     unsigned char *constant_buffer;
5087     int param_size;
5088
5089     assert(sizeof(struct pp_static_parameter) == 128);
5090     assert(sizeof(struct gen7_pp_static_parameter) == 192);
5091
5092     if (IS_GEN7(i965->intel.device_id) ||
5093         IS_GEN8(i965->intel.device_id))
5094         param_size = sizeof(struct gen7_pp_static_parameter);
5095     else
5096         param_size = sizeof(struct pp_static_parameter);
5097
5098     dri_bo_map(pp_context->curbe.bo, 1);
5099     assert(pp_context->curbe.bo->virtual);
5100     constant_buffer = pp_context->curbe.bo->virtual;
5101     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
5102     dri_bo_unmap(pp_context->curbe.bo);
5103 }
5104
5105 static void
5106 gen6_pp_states_setup(VADriverContextP ctx,
5107                      struct i965_post_processing_context *pp_context)
5108 {
5109     gen6_pp_interface_descriptor_table(ctx, pp_context);
5110     gen6_pp_upload_constants(ctx, pp_context);
5111 }
5112
5113 static void
5114 gen8_pp_upload_constants(VADriverContextP ctx,
5115                          struct i965_post_processing_context *pp_context)
5116 {
5117     struct i965_driver_data *i965 = i965_driver_data(ctx);
5118     unsigned char *constant_buffer;
5119     int param_size;
5120
5121     assert(sizeof(struct gen7_pp_static_parameter) == 192);
5122
5123     if (IS_GEN8(i965->intel.device_id))
5124         param_size = sizeof(struct gen7_pp_static_parameter);
5125
5126     dri_bo_map(pp_context->dynamic_state.bo, 1);
5127     assert(pp_context->dynamic_state.bo->virtual);
5128     constant_buffer = (unsigned char *) pp_context->dynamic_state.bo->virtual +
5129                         pp_context->curbe_offset;
5130
5131     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
5132     dri_bo_unmap(pp_context->dynamic_state.bo);
5133     return;
5134 }
5135
5136 static void
5137 gen8_pp_states_setup(VADriverContextP ctx,
5138                      struct i965_post_processing_context *pp_context)
5139 {
5140     gen8_pp_interface_descriptor_table(ctx, pp_context);
5141     gen8_pp_upload_constants(ctx, pp_context);
5142 }
5143
5144 static void
5145 gen6_pp_pipeline_select(VADriverContextP ctx,
5146                         struct i965_post_processing_context *pp_context)
5147 {
5148     struct intel_batchbuffer *batch = pp_context->batch;
5149
5150     BEGIN_BATCH(batch, 1);
5151     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
5152     ADVANCE_BATCH(batch);
5153 }
5154
5155 static void
5156 gen6_pp_state_base_address(VADriverContextP ctx,
5157                            struct i965_post_processing_context *pp_context)
5158 {
5159     struct intel_batchbuffer *batch = pp_context->batch;
5160
5161     BEGIN_BATCH(batch, 10);
5162     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
5163     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5164     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
5165     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5166     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5167     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5168     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5169     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5170     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5171     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5172     ADVANCE_BATCH(batch);
5173 }
5174
5175 static void
5176 gen8_pp_state_base_address(VADriverContextP ctx,
5177                            struct i965_post_processing_context *pp_context)
5178 {
5179     struct intel_batchbuffer *batch = pp_context->batch;
5180
5181     BEGIN_BATCH(batch, 16);
5182     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
5183         /* DW1 Generate state address */
5184     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5185         OUT_BATCH(batch, 0);
5186         OUT_BATCH(batch, 0);
5187         /* DW4. Surface state address */
5188     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
5189         OUT_BATCH(batch, 0);
5190         /* DW6. Dynamic state address */
5191     OUT_RELOC(batch, pp_context->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
5192                 0, 0 | BASE_ADDRESS_MODIFY);
5193         OUT_BATCH(batch, 0);
5194
5195         /* DW8. Indirect object address */
5196     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
5197         OUT_BATCH(batch, 0);
5198
5199         /* DW10. Instruction base address */
5200     OUT_RELOC(batch, pp_context->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
5201         OUT_BATCH(batch, 0);
5202
5203     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5204     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5205     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5206     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY);
5207     ADVANCE_BATCH(batch);
5208 }
5209
5210 static void
5211 gen6_pp_vfe_state(VADriverContextP ctx,
5212                   struct i965_post_processing_context *pp_context)
5213 {
5214     struct intel_batchbuffer *batch = pp_context->batch;
5215
5216     BEGIN_BATCH(batch, 8);
5217     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
5218     OUT_BATCH(batch, 0);
5219     OUT_BATCH(batch,
5220               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
5221               pp_context->vfe_gpu_state.num_urb_entries << 8);
5222     OUT_BATCH(batch, 0);
5223     OUT_BATCH(batch,
5224               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
5225                 /* URB Entry Allocation Size, in 256 bits unit */
5226               (pp_context->vfe_gpu_state.curbe_allocation_size));
5227                 /* CURBE Allocation Size, in 256 bits unit */
5228     OUT_BATCH(batch, 0);
5229     OUT_BATCH(batch, 0);
5230     OUT_BATCH(batch, 0);
5231     ADVANCE_BATCH(batch);
5232 }
5233
5234 static void
5235 gen8_pp_vfe_state(VADriverContextP ctx,
5236                   struct i965_post_processing_context *pp_context)
5237 {
5238     struct intel_batchbuffer *batch = pp_context->batch;
5239
5240     BEGIN_BATCH(batch, 9);
5241     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
5242     OUT_BATCH(batch, 0);
5243     OUT_BATCH(batch, 0);
5244     OUT_BATCH(batch,
5245               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
5246               pp_context->vfe_gpu_state.num_urb_entries << 8);
5247     OUT_BATCH(batch, 0);
5248     OUT_BATCH(batch,
5249               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
5250                 /* URB Entry Allocation Size, in 256 bits unit */
5251               (pp_context->vfe_gpu_state.curbe_allocation_size));
5252                 /* CURBE Allocation Size, in 256 bits unit */
5253     OUT_BATCH(batch, 0);
5254     OUT_BATCH(batch, 0);
5255     OUT_BATCH(batch, 0);
5256     ADVANCE_BATCH(batch);
5257 }
5258
5259 static void
5260 gen6_pp_curbe_load(VADriverContextP ctx,
5261                    struct i965_post_processing_context *pp_context)
5262 {
5263     struct intel_batchbuffer *batch = pp_context->batch;
5264     struct i965_driver_data *i965 = i965_driver_data(ctx);
5265     int param_size;
5266
5267     if (IS_GEN7(i965->intel.device_id) ||
5268         IS_GEN8(i965->intel.device_id))
5269         param_size = sizeof(struct gen7_pp_static_parameter);
5270     else
5271         param_size = sizeof(struct pp_static_parameter);
5272
5273     BEGIN_BATCH(batch, 4);
5274     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
5275     OUT_BATCH(batch, 0);
5276     OUT_BATCH(batch,
5277               param_size);
5278     OUT_RELOC(batch, 
5279               pp_context->curbe.bo,
5280               I915_GEM_DOMAIN_INSTRUCTION, 0,
5281               0);
5282     ADVANCE_BATCH(batch);
5283 }
5284
5285 static void
5286 gen6_interface_descriptor_load(VADriverContextP ctx,
5287                                struct i965_post_processing_context *pp_context)
5288 {
5289     struct intel_batchbuffer *batch = pp_context->batch;
5290
5291     BEGIN_BATCH(batch, 4);
5292     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
5293     OUT_BATCH(batch, 0);
5294     OUT_BATCH(batch,
5295               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
5296     OUT_RELOC(batch, 
5297               pp_context->idrt.bo,
5298               I915_GEM_DOMAIN_INSTRUCTION, 0,
5299               0);
5300     ADVANCE_BATCH(batch);
5301 }
5302
5303 static void
5304 gen8_interface_descriptor_load(VADriverContextP ctx,
5305                                struct i965_post_processing_context *pp_context)
5306 {
5307     struct intel_batchbuffer *batch = pp_context->batch;
5308
5309     BEGIN_BATCH(batch, 4);
5310     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
5311     OUT_BATCH(batch, 0);
5312     OUT_BATCH(batch,
5313               pp_context->idrt.num_interface_descriptors * sizeof(struct gen8_interface_descriptor_data));
5314     OUT_BATCH(batch, pp_context->idrt_offset);
5315     ADVANCE_BATCH(batch);
5316 }
5317
5318 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
5319 {
5320     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
5321
5322     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
5323     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
5324     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
5325     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
5326     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
5327     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
5328
5329     /* 1 x N */
5330     if (x_steps == 1) {
5331         if (y == y_steps-1) {
5332             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
5333         }
5334         else {
5335             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
5336         }
5337     }
5338
5339     /* M x 1 */
5340     if (y_steps == 1) {
5341         if (x == 0) { // all blocks in this group are on the left edge
5342             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
5343             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
5344         }
5345         else if (x == x_steps-1) {
5346             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
5347             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
5348         }
5349         else {
5350             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
5351             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
5352             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
5353         }
5354     }
5355
5356 }
5357
5358 static void
5359 gen6_pp_object_walker(VADriverContextP ctx,
5360                       struct i965_post_processing_context *pp_context)
5361 {
5362     struct i965_driver_data *i965 = i965_driver_data(ctx);
5363     struct intel_batchbuffer *batch = pp_context->batch;
5364     int x, x_steps, y, y_steps;
5365     int param_size, command_length_in_dws;
5366     dri_bo *command_buffer;
5367     unsigned int *command_ptr;
5368
5369     if (IS_GEN7(i965->intel.device_id) ||
5370         IS_GEN8(i965->intel.device_id))
5371         param_size = sizeof(struct gen7_pp_inline_parameter);
5372     else
5373         param_size = sizeof(struct pp_inline_parameter);
5374
5375     x_steps = pp_context->pp_x_steps(pp_context->private_context);
5376     y_steps = pp_context->pp_y_steps(pp_context->private_context);
5377     command_length_in_dws = 6 + (param_size >> 2);
5378     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
5379                                   "command objects buffer",
5380                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
5381                                   4096);
5382
5383     dri_bo_map(command_buffer, 1);
5384     command_ptr = command_buffer->virtual;
5385
5386     for (y = 0; y < y_steps; y++) {
5387         for (x = 0; x < x_steps; x++) {
5388             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
5389                 // some common block parameter update goes here, apply to all pp functions
5390                 if (IS_GEN6(i965->intel.device_id))
5391                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
5392                 
5393                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
5394                 *command_ptr++ = 0;
5395                 *command_ptr++ = 0;
5396                 *command_ptr++ = 0;
5397                 *command_ptr++ = 0;
5398                 *command_ptr++ = 0;
5399                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
5400                 command_ptr += (param_size >> 2);
5401             }
5402         }
5403     }
5404
5405     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
5406         *command_ptr++ = 0;
5407
5408     *command_ptr = MI_BATCH_BUFFER_END;
5409
5410     dri_bo_unmap(command_buffer);
5411
5412     if (IS_GEN8(i965->intel.device_id)) {
5413         BEGIN_BATCH(batch, 3);
5414         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
5415         OUT_RELOC(batch, command_buffer, 
5416               I915_GEM_DOMAIN_COMMAND, 0, 
5417               0);
5418         OUT_BATCH(batch, 0);
5419         ADVANCE_BATCH(batch);
5420     } else {
5421         BEGIN_BATCH(batch, 2);
5422         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
5423         OUT_RELOC(batch, command_buffer, 
5424               I915_GEM_DOMAIN_COMMAND, 0, 
5425               0);
5426         ADVANCE_BATCH(batch);
5427     }
5428     
5429     dri_bo_unreference(command_buffer);
5430
5431     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
5432      * will cause control to pass back to ring buffer 
5433      */
5434     intel_batchbuffer_end_atomic(batch);
5435     intel_batchbuffer_flush(batch);
5436     intel_batchbuffer_start_atomic(batch, 0x1000);
5437 }
5438
5439 static void
5440 gen6_pp_pipeline_setup(VADriverContextP ctx,
5441                        struct i965_post_processing_context *pp_context)
5442 {
5443     struct intel_batchbuffer *batch = pp_context->batch;
5444
5445     intel_batchbuffer_start_atomic(batch, 0x1000);
5446     intel_batchbuffer_emit_mi_flush(batch);
5447     gen6_pp_pipeline_select(ctx, pp_context);
5448     gen6_pp_state_base_address(ctx, pp_context);
5449     gen6_pp_vfe_state(ctx, pp_context);
5450     gen6_pp_curbe_load(ctx, pp_context);
5451     gen6_interface_descriptor_load(ctx, pp_context);
5452     gen6_pp_object_walker(ctx, pp_context);
5453     intel_batchbuffer_end_atomic(batch);
5454 }
5455
5456 static void
5457 gen8_pp_curbe_load(VADriverContextP ctx,
5458                    struct i965_post_processing_context *pp_context)
5459 {
5460     struct intel_batchbuffer *batch = pp_context->batch;
5461     struct i965_driver_data *i965 = i965_driver_data(ctx);
5462     int param_size = 64;
5463
5464     if (IS_GEN8(i965->intel.device_id))
5465         param_size = sizeof(struct gen7_pp_static_parameter);
5466
5467     BEGIN_BATCH(batch, 4);
5468     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
5469     OUT_BATCH(batch, 0);
5470     OUT_BATCH(batch,
5471               param_size);
5472     OUT_BATCH(batch, pp_context->curbe_offset);
5473     ADVANCE_BATCH(batch);
5474 }
5475
5476 static void
5477 gen8_pp_object_walker(VADriverContextP ctx,
5478                       struct i965_post_processing_context *pp_context)
5479 {
5480     struct i965_driver_data *i965 = i965_driver_data(ctx);
5481     struct intel_batchbuffer *batch = pp_context->batch;
5482     int x, x_steps, y, y_steps;
5483     int param_size, command_length_in_dws, extra_cmd_in_dws;
5484     dri_bo *command_buffer;
5485     unsigned int *command_ptr;
5486
5487     param_size = sizeof(struct gen7_pp_inline_parameter);
5488     if (IS_GEN8(i965->intel.device_id))
5489         param_size = sizeof(struct gen7_pp_inline_parameter);
5490
5491     x_steps = pp_context->pp_x_steps(pp_context->private_context);
5492     y_steps = pp_context->pp_y_steps(pp_context->private_context);
5493     command_length_in_dws = 6 + (param_size >> 2);
5494     extra_cmd_in_dws = 2;
5495     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
5496                                   "command objects buffer",
5497                                   (command_length_in_dws + extra_cmd_in_dws) * 4 * x_steps * y_steps + 64,
5498                                   4096);
5499
5500     dri_bo_map(command_buffer, 1);
5501     command_ptr = command_buffer->virtual;
5502
5503     for (y = 0; y < y_steps; y++) {
5504         for (x = 0; x < x_steps; x++) {
5505             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
5506
5507                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
5508                 *command_ptr++ = 0;
5509                 *command_ptr++ = 0;
5510                 *command_ptr++ = 0;
5511                 *command_ptr++ = 0;
5512                 *command_ptr++ = 0;
5513                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
5514                 command_ptr += (param_size >> 2);
5515
5516                 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
5517                 *command_ptr++ = 0;
5518             }
5519         }
5520     }
5521
5522     if ((command_length_in_dws + extra_cmd_in_dws) * x_steps * y_steps % 2 == 0)
5523         *command_ptr++ = 0;
5524
5525     *command_ptr++ = MI_BATCH_BUFFER_END;
5526     *command_ptr++ = 0;
5527
5528     dri_bo_unmap(command_buffer);
5529
5530     if (IS_GEN8(i965->intel.device_id)) {
5531         BEGIN_BATCH(batch, 3);
5532         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
5533         OUT_RELOC(batch, command_buffer,
5534                   I915_GEM_DOMAIN_COMMAND, 0, 0);
5535         OUT_BATCH(batch, 0);
5536         ADVANCE_BATCH(batch);
5537     }
5538
5539     dri_bo_unreference(command_buffer);
5540
5541     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
5542      * will cause control to pass back to ring buffer
5543      */
5544     intel_batchbuffer_end_atomic(batch);
5545     intel_batchbuffer_flush(batch);
5546     intel_batchbuffer_start_atomic(batch, 0x1000);
5547 }
5548
5549 static void
5550 gen8_pp_pipeline_setup(VADriverContextP ctx,
5551                        struct i965_post_processing_context *pp_context)
5552 {
5553     struct intel_batchbuffer *batch = pp_context->batch;
5554
5555     intel_batchbuffer_start_atomic(batch, 0x1000);
5556     intel_batchbuffer_emit_mi_flush(batch);
5557     gen6_pp_pipeline_select(ctx, pp_context);
5558     gen8_pp_state_base_address(ctx, pp_context);
5559     gen8_pp_vfe_state(ctx, pp_context);
5560     gen8_pp_curbe_load(ctx, pp_context);
5561     gen8_interface_descriptor_load(ctx, pp_context);
5562     gen8_pp_vfe_state(ctx, pp_context);
5563     gen8_pp_object_walker(ctx, pp_context);
5564     intel_batchbuffer_end_atomic(batch);
5565 }
5566
5567 static VAStatus
5568 gen6_post_processing(
5569     VADriverContextP ctx,
5570     struct i965_post_processing_context *pp_context,
5571     const struct i965_surface *src_surface,
5572     const VARectangle *src_rect,
5573     struct i965_surface *dst_surface,
5574     const VARectangle *dst_rect,
5575     int pp_index,
5576     void *filter_param
5577 )
5578 {
5579     VAStatus va_status;
5580     
5581     va_status = gen6_pp_initialize(ctx, pp_context,
5582                                    src_surface,
5583                                    src_rect,
5584                                    dst_surface,
5585                                    dst_rect,
5586                                    pp_index,
5587                                    filter_param);
5588
5589     if (va_status == VA_STATUS_SUCCESS) {
5590         gen6_pp_states_setup(ctx, pp_context);
5591         gen6_pp_pipeline_setup(ctx, pp_context);
5592     }
5593
5594     if (va_status == VA_STATUS_SUCCESS_1)
5595         va_status = VA_STATUS_SUCCESS;
5596
5597     return va_status;
5598 }
5599
5600 static VAStatus
5601 gen8_post_processing(
5602     VADriverContextP   ctx,
5603     struct i965_post_processing_context *pp_context,
5604     const struct i965_surface *src_surface,
5605     const VARectangle *src_rect,
5606     struct i965_surface *dst_surface,
5607     const VARectangle *dst_rect,
5608     int                pp_index,
5609     void * filter_param
5610 )
5611 {
5612     VAStatus va_status;
5613     
5614     va_status = gen8_pp_initialize(ctx, pp_context,
5615                                    src_surface,
5616                                    src_rect,
5617                                    dst_surface,
5618                                    dst_rect,
5619                                    pp_index,
5620                                    filter_param);
5621
5622     if (va_status == VA_STATUS_SUCCESS) {
5623         gen8_pp_states_setup(ctx, pp_context);
5624         gen8_pp_pipeline_setup(ctx, pp_context);
5625     }
5626
5627     return va_status;
5628 }
5629
5630 static VAStatus
5631 i965_post_processing_internal(
5632     VADriverContextP   ctx,
5633     struct i965_post_processing_context *pp_context,
5634     const struct i965_surface *src_surface,
5635     const VARectangle *src_rect,
5636     struct i965_surface *dst_surface,
5637     const VARectangle *dst_rect,
5638     int                pp_index,
5639     void *filter_param
5640 )
5641 {
5642     VAStatus va_status;
5643     struct i965_driver_data *i965 = i965_driver_data(ctx);
5644
5645     if (IS_GEN8(i965->intel.device_id))
5646         va_status = gen8_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5647     else if (IS_GEN6(i965->intel.device_id) ||
5648         IS_GEN7(i965->intel.device_id))
5649         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5650     else
5651         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
5652     
5653     return va_status;
5654 }
5655
5656 static void
5657 rgb_to_yuv(unsigned int argb,
5658            unsigned char *y,
5659            unsigned char *u,
5660            unsigned char *v,
5661            unsigned char *a)
5662 {
5663     int r = ((argb >> 16) & 0xff);
5664     int g = ((argb >> 8) & 0xff);
5665     int b = ((argb >> 0) & 0xff);
5666     
5667     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
5668     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
5669     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
5670     *a = ((argb >> 24) & 0xff);
5671 }
5672
5673 static void 
5674 i965_vpp_clear_surface(VADriverContextP ctx,
5675                        struct i965_post_processing_context *pp_context,
5676                        struct object_surface *obj_surface,
5677                        unsigned int color)
5678 {
5679     struct i965_driver_data *i965 = i965_driver_data(ctx);
5680     struct intel_batchbuffer *batch = pp_context->batch;
5681     unsigned int blt_cmd, br13;
5682     unsigned int tiling = 0, swizzle = 0;
5683     int pitch;
5684     unsigned char y, u, v, a = 0;
5685     int region_width, region_height;
5686
5687     /* Currently only support NV12 surface */
5688     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
5689         return;
5690
5691     rgb_to_yuv(color, &y, &u, &v, &a);
5692
5693     if (a == 0)
5694         return;
5695
5696     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5697     blt_cmd = XY_COLOR_BLT_CMD;
5698     pitch = obj_surface->width;
5699
5700     if (tiling != I915_TILING_NONE) {
5701         assert(tiling == I915_TILING_Y);
5702         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
5703         // pitch >>= 2;
5704     }
5705
5706     br13 = 0xf0 << 16;
5707     br13 |= BR13_8;
5708     br13 |= pitch;
5709
5710     if (IS_GEN6(i965->intel.device_id) ||
5711         IS_GEN7(i965->intel.device_id) ||
5712         IS_GEN8(i965->intel.device_id)) {
5713         intel_batchbuffer_start_atomic_blt(batch, 48);
5714         BEGIN_BLT_BATCH(batch, 12);
5715     } else {
5716         intel_batchbuffer_start_atomic(batch, 48);
5717         BEGIN_BATCH(batch, 12);
5718     }
5719
5720     region_width = obj_surface->width;
5721     region_height = obj_surface->height;
5722
5723     OUT_BATCH(batch, blt_cmd);
5724     OUT_BATCH(batch, br13);
5725     OUT_BATCH(batch,
5726               0 << 16 |
5727               0);
5728     OUT_BATCH(batch,
5729               region_height << 16 |
5730               region_width);
5731     OUT_RELOC(batch, obj_surface->bo, 
5732               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
5733               0);
5734     OUT_BATCH(batch, y);
5735
5736     br13 = 0xf0 << 16;
5737     br13 |= BR13_565;
5738     br13 |= pitch;
5739
5740     region_width = obj_surface->width / 2;
5741     region_height = obj_surface->height / 2;
5742
5743     if (tiling == I915_TILING_Y) {
5744         region_height = ALIGN(obj_surface->height / 2, 32);
5745     }
5746
5747     OUT_BATCH(batch, blt_cmd);
5748     OUT_BATCH(batch, br13);
5749     OUT_BATCH(batch,
5750               0 << 16 |
5751               0);
5752     OUT_BATCH(batch,
5753               region_height << 16 |
5754               region_width);
5755     OUT_RELOC(batch, obj_surface->bo, 
5756               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
5757               obj_surface->width * obj_surface->y_cb_offset);
5758     OUT_BATCH(batch, v << 8 | u);
5759
5760     ADVANCE_BATCH(batch);
5761     intel_batchbuffer_end_atomic(batch);
5762 }
5763
5764 VAStatus
5765 i965_scaling_processing(
5766     VADriverContextP   ctx,
5767     struct object_surface *src_surface_obj,
5768     const VARectangle *src_rect,
5769     struct object_surface *dst_surface_obj,
5770     const VARectangle *dst_rect,
5771     unsigned int       flags)
5772 {
5773     VAStatus va_status = VA_STATUS_SUCCESS;
5774     struct i965_driver_data *i965 = i965_driver_data(ctx);
5775  
5776     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
5777     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
5778
5779     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
5780         struct i965_surface src_surface;
5781         struct i965_surface dst_surface;
5782
5783          _i965LockMutex(&i965->pp_mutex);
5784
5785          src_surface.base = (struct object_base *)src_surface_obj;
5786          src_surface.type = I965_SURFACE_TYPE_SURFACE;
5787          src_surface.flags = I965_SURFACE_FLAG_FRAME;
5788          dst_surface.base = (struct object_base *)dst_surface_obj;
5789          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5790          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5791
5792          va_status = i965_post_processing_internal(ctx, i965->pp_context,
5793                                                    &src_surface,
5794                                                    src_rect,
5795                                                    &dst_surface,
5796                                                    dst_rect,
5797                                                    PP_NV12_AVS,
5798                                                    NULL);
5799
5800          _i965UnlockMutex(&i965->pp_mutex);
5801     }
5802
5803     return va_status;
5804 }
5805
5806 VASurfaceID
5807 i965_post_processing(
5808     VADriverContextP   ctx,
5809     struct object_surface *obj_surface,
5810     const VARectangle *src_rect,
5811     const VARectangle *dst_rect,
5812     unsigned int       flags,
5813     int               *has_done_scaling  
5814 )
5815 {
5816     struct i965_driver_data *i965 = i965_driver_data(ctx);
5817     VASurfaceID out_surface_id = VA_INVALID_ID;
5818     VASurfaceID tmp_id = VA_INVALID_ID;
5819     
5820     *has_done_scaling = 0;
5821
5822     if (HAS_PP(i965)) {
5823         VAStatus status;
5824         struct i965_surface src_surface;
5825         struct i965_surface dst_surface;
5826
5827         /* Currently only support post processing for NV12 surface */
5828         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
5829             return out_surface_id;
5830
5831         _i965LockMutex(&i965->pp_mutex);
5832
5833         if (flags & I965_PP_FLAG_MCDI) {
5834             src_surface.base = (struct object_base *)obj_surface;
5835             src_surface.type = I965_SURFACE_TYPE_SURFACE;
5836             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
5837                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
5838
5839             status = i965_CreateSurfaces(ctx,
5840                                          obj_surface->orig_width,
5841                                          obj_surface->orig_height,
5842                                          VA_RT_FORMAT_YUV420,
5843                                          1,
5844                                          &out_surface_id);
5845             assert(status == VA_STATUS_SUCCESS);
5846             obj_surface = SURFACE(out_surface_id);
5847             assert(obj_surface);
5848             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5849             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
5850
5851             dst_surface.base = (struct object_base *)obj_surface;
5852             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5853             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5854
5855             i965_post_processing_internal(ctx, i965->pp_context,
5856                                           &src_surface,
5857                                           src_rect,
5858                                           &dst_surface,
5859                                           dst_rect,
5860                                           PP_NV12_DNDI,
5861                                           NULL);
5862         }
5863
5864         if (flags & I965_PP_FLAG_AVS) {
5865             struct i965_render_state *render_state = &i965->render_state;
5866             struct intel_region *dest_region = render_state->draw_region;
5867
5868             if (out_surface_id != VA_INVALID_ID)
5869                 tmp_id = out_surface_id;
5870
5871             src_surface.base = (struct object_base *)obj_surface;
5872             src_surface.type = I965_SURFACE_TYPE_SURFACE;
5873             src_surface.flags = I965_SURFACE_FLAG_FRAME;
5874
5875             status = i965_CreateSurfaces(ctx,
5876                                          dest_region->width,
5877                                          dest_region->height,
5878                                          VA_RT_FORMAT_YUV420,
5879                                          1,
5880                                          &out_surface_id);
5881             assert(status == VA_STATUS_SUCCESS);
5882             obj_surface = SURFACE(out_surface_id);
5883             assert(obj_surface);
5884             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5885             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
5886
5887             dst_surface.base = (struct object_base *)obj_surface;
5888             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5889             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5890
5891             i965_post_processing_internal(ctx, i965->pp_context,
5892                                           &src_surface,
5893                                           src_rect,
5894                                           &dst_surface,
5895                                           dst_rect,
5896                                           PP_NV12_AVS,
5897                                           NULL);
5898
5899             if (tmp_id != VA_INVALID_ID)
5900                 i965_DestroySurfaces(ctx, &tmp_id, 1);
5901                 
5902             *has_done_scaling = 1;
5903         }
5904
5905         _i965UnlockMutex(&i965->pp_mutex);
5906     }
5907
5908     return out_surface_id;
5909 }       
5910
5911 static VAStatus
5912 i965_image_pl2_processing(VADriverContextP ctx,
5913                           const struct i965_surface *src_surface,
5914                           const VARectangle *src_rect,
5915                           struct i965_surface *dst_surface,
5916                           const VARectangle *dst_rect);
5917
5918 static VAStatus
5919 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
5920                                    VAStatus (*i965_image_plx_nv12_processing)(
5921                                        VADriverContextP,
5922                                        const struct i965_surface *,
5923                                        const VARectangle *,
5924                                        struct i965_surface *,
5925                                        const VARectangle *),
5926                                    const struct i965_surface *src_surface,
5927                                    const VARectangle *src_rect,
5928                                    struct i965_surface *dst_surface,
5929                                    const VARectangle *dst_rect)
5930 {
5931     struct i965_driver_data *i965 = i965_driver_data(ctx);
5932     VAStatus status;
5933     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
5934     struct object_surface *obj_surface = NULL;
5935     struct i965_surface tmp_surface;
5936     int width, height;
5937
5938     pp_get_surface_size(ctx, dst_surface, &width, &height);
5939     status = i965_CreateSurfaces(ctx,
5940                                  width,
5941                                  height,
5942                                  VA_RT_FORMAT_YUV420,
5943                                  1,
5944                                  &tmp_surface_id);
5945     assert(status == VA_STATUS_SUCCESS);
5946     obj_surface = SURFACE(tmp_surface_id);
5947     assert(obj_surface);
5948     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
5949
5950     tmp_surface.base = (struct object_base *)obj_surface;
5951     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
5952     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
5953
5954     status = i965_image_plx_nv12_processing(ctx,
5955                                             src_surface,
5956                                             src_rect,
5957                                             &tmp_surface,
5958                                             dst_rect);
5959
5960     if (status == VA_STATUS_SUCCESS)
5961         status = i965_image_pl2_processing(ctx,
5962                                            &tmp_surface,
5963                                            dst_rect,
5964                                            dst_surface,
5965                                            dst_rect);
5966
5967     i965_DestroySurfaces(ctx,
5968                          &tmp_surface_id,
5969                          1);
5970
5971     return status;
5972 }
5973
5974
5975 static VAStatus
5976 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
5977                                const struct i965_surface *src_surface,
5978                                const VARectangle *src_rect,
5979                                struct i965_surface *dst_surface,
5980                                const VARectangle *dst_rect)
5981 {
5982     struct i965_driver_data *i965 = i965_driver_data(ctx);
5983     struct i965_post_processing_context *pp_context = i965->pp_context;
5984     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5985     VAStatus vaStatus;
5986
5987     switch (fourcc) {
5988     case VA_FOURCC('N', 'V', '1', '2'):
5989         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5990                                                  src_surface,
5991                                                  src_rect,
5992                                                  dst_surface,
5993                                                  dst_rect,
5994                                                  PP_RGBX_LOAD_SAVE_NV12,
5995                                                  NULL);
5996         intel_batchbuffer_flush(pp_context->batch);
5997         break;
5998
5999     default:
6000         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6001                                                       i965_image_pl1_rgbx_processing,
6002                                                       src_surface,
6003                                                       src_rect,
6004                                                       dst_surface,
6005                                                       dst_rect);
6006         break;
6007     }
6008
6009     return vaStatus;
6010 }
6011
6012 static VAStatus
6013 i965_image_pl3_processing(VADriverContextP ctx,
6014                           const struct i965_surface *src_surface,
6015                           const VARectangle *src_rect,
6016                           struct i965_surface *dst_surface,
6017                           const VARectangle *dst_rect)
6018 {
6019     struct i965_driver_data *i965 = i965_driver_data(ctx);
6020     struct i965_post_processing_context *pp_context = i965->pp_context;
6021     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6022     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
6023
6024     switch (fourcc) {
6025     case VA_FOURCC('N', 'V', '1', '2'):
6026         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6027                                                  src_surface,
6028                                                  src_rect,
6029                                                  dst_surface,
6030                                                  dst_rect,
6031                                                  PP_PL3_LOAD_SAVE_N12,
6032                                                  NULL);
6033         intel_batchbuffer_flush(pp_context->batch);
6034         break;
6035
6036     case VA_FOURCC('I', 'M', 'C', '1'):
6037     case VA_FOURCC('I', 'M', 'C', '3'):
6038     case VA_FOURCC('Y', 'V', '1', '2'):
6039     case VA_FOURCC('I', '4', '2', '0'):
6040         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6041                                                  src_surface,
6042                                                  src_rect,
6043                                                  dst_surface,
6044                                                  dst_rect,
6045                                                  PP_PL3_LOAD_SAVE_PL3,
6046                                                  NULL);
6047         intel_batchbuffer_flush(pp_context->batch);
6048         break;
6049
6050     case VA_FOURCC('Y', 'U', 'Y', '2'):
6051     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6052         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6053                                                  src_surface,
6054                                                  src_rect,
6055                                                  dst_surface,
6056                                                  dst_rect,
6057                                                  PP_PL3_LOAD_SAVE_PA,
6058                                                  NULL);
6059         intel_batchbuffer_flush(pp_context->batch);
6060         break;
6061
6062     default:
6063         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6064                                                       i965_image_pl3_processing,
6065                                                       src_surface,
6066                                                       src_rect,
6067                                                       dst_surface,
6068                                                       dst_rect);
6069         break;
6070     }
6071
6072     return vaStatus;
6073 }
6074
6075 static VAStatus
6076 i965_image_pl2_processing(VADriverContextP ctx,
6077                           const struct i965_surface *src_surface,
6078                           const VARectangle *src_rect,
6079                           struct i965_surface *dst_surface,
6080                           const VARectangle *dst_rect)
6081 {
6082     struct i965_driver_data *i965 = i965_driver_data(ctx);
6083     struct i965_post_processing_context *pp_context = i965->pp_context;
6084     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6085     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
6086
6087     switch (fourcc) {
6088     case VA_FOURCC('N', 'V', '1', '2'):
6089         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6090                                                  src_surface,
6091                                                  src_rect,
6092                                                  dst_surface,
6093                                                  dst_rect,
6094                                                  PP_NV12_LOAD_SAVE_N12,
6095                                                  NULL);
6096         break;
6097
6098     case VA_FOURCC('I', 'M', 'C', '1'):
6099     case VA_FOURCC('I', 'M', 'C', '3'):
6100     case VA_FOURCC('Y', 'V', '1', '2'):
6101     case VA_FOURCC('I', '4', '2', '0'):
6102         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6103                                                  src_surface,
6104                                                  src_rect,
6105                                                  dst_surface,
6106                                                  dst_rect,
6107                                                  PP_NV12_LOAD_SAVE_PL3,
6108                                                  NULL);
6109         break;
6110
6111     case VA_FOURCC('Y', 'U', 'Y', '2'):
6112     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6113         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6114                                                  src_surface,
6115                                                  src_rect,
6116                                                  dst_surface,
6117                                                  dst_rect,
6118                                                  PP_NV12_LOAD_SAVE_PA,
6119                                                  NULL);
6120         break;
6121
6122     case VA_FOURCC('B', 'G', 'R', 'X'):
6123     case VA_FOURCC('B', 'G', 'R', 'A'):
6124     case VA_FOURCC('R', 'G', 'B', 'X'):
6125     case VA_FOURCC('R', 'G', 'B', 'A'):
6126         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6127                                                  src_surface,
6128                                                  src_rect,
6129                                                  dst_surface,
6130                                                  dst_rect,
6131                                                  PP_NV12_LOAD_SAVE_RGBX,
6132                                                  NULL);
6133         break;
6134
6135     default:
6136         return VA_STATUS_ERROR_UNIMPLEMENTED;
6137     }
6138
6139     intel_batchbuffer_flush(pp_context->batch);
6140
6141     return vaStatus;
6142 }
6143
6144 static VAStatus
6145 i965_image_pl1_processing(VADriverContextP ctx,
6146                           const struct i965_surface *src_surface,
6147                           const VARectangle *src_rect,
6148                           struct i965_surface *dst_surface,
6149                           const VARectangle *dst_rect)
6150 {
6151     struct i965_driver_data *i965 = i965_driver_data(ctx);
6152     struct i965_post_processing_context *pp_context = i965->pp_context;
6153     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
6154     VAStatus vaStatus;
6155
6156     switch (fourcc) {
6157     case VA_FOURCC('N', 'V', '1', '2'):
6158         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6159                                                  src_surface,
6160                                                  src_rect,
6161                                                  dst_surface,
6162                                                  dst_rect,
6163                                                  PP_PA_LOAD_SAVE_NV12,
6164                                                  NULL);
6165         intel_batchbuffer_flush(pp_context->batch);
6166         break;
6167
6168     case VA_FOURCC('Y', 'V', '1', '2'):
6169         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6170                                                  src_surface,
6171                                                  src_rect,
6172                                                  dst_surface,
6173                                                  dst_rect,
6174                                                  PP_PA_LOAD_SAVE_PL3,
6175                                                  NULL);
6176         intel_batchbuffer_flush(pp_context->batch);
6177         break;
6178
6179     case VA_FOURCC('Y', 'U', 'Y', '2'):
6180     case VA_FOURCC('U', 'Y', 'V', 'Y'):
6181         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
6182                                                  src_surface,
6183                                                  src_rect,
6184                                                  dst_surface,
6185                                                  dst_rect,
6186                                                  PP_PA_LOAD_SAVE_PA,
6187                                                  NULL);
6188         intel_batchbuffer_flush(pp_context->batch);
6189         break;
6190
6191     default:
6192         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
6193                                                       i965_image_pl1_processing,
6194                                                       src_surface,
6195                                                       src_rect,
6196                                                       dst_surface,
6197                                                       dst_rect);
6198         break;
6199     }
6200
6201     return vaStatus;
6202 }
6203
6204 VAStatus
6205 i965_image_processing(VADriverContextP ctx,
6206                       const struct i965_surface *src_surface,
6207                       const VARectangle *src_rect,
6208                       struct i965_surface *dst_surface,
6209                       const VARectangle *dst_rect)
6210 {
6211     struct i965_driver_data *i965 = i965_driver_data(ctx);
6212     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
6213
6214     if (HAS_PP(i965)) {
6215         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
6216
6217         _i965LockMutex(&i965->pp_mutex);
6218
6219         switch (fourcc) {
6220         case VA_FOURCC('Y', 'V', '1', '2'):
6221         case VA_FOURCC('I', '4', '2', '0'):
6222         case VA_FOURCC('I', 'M', 'C', '1'):
6223         case VA_FOURCC('I', 'M', 'C', '3'):
6224         case VA_FOURCC('4', '2', '2', 'H'):
6225         case VA_FOURCC('4', '2', '2', 'V'):
6226         case VA_FOURCC('4', '1', '1', 'P'):
6227         case VA_FOURCC('4', '4', '4', 'P'):
6228             status = i965_image_pl3_processing(ctx,
6229                                                src_surface,
6230                                                src_rect,
6231                                                dst_surface,
6232                                                dst_rect);
6233             break;
6234
6235         case  VA_FOURCC('N', 'V', '1', '2'):
6236             status = i965_image_pl2_processing(ctx,
6237                                                src_surface,
6238                                                src_rect,
6239                                                dst_surface,
6240                                                dst_rect);
6241             break;
6242         case VA_FOURCC('Y', 'U', 'Y', '2'):
6243         case VA_FOURCC('U', 'Y', 'V', 'Y'):
6244             status = i965_image_pl1_processing(ctx,
6245                                                src_surface,
6246                                                src_rect,
6247                                                dst_surface,
6248                                                dst_rect);
6249             break;
6250         case VA_FOURCC('B', 'G', 'R', 'A'):
6251         case VA_FOURCC('B', 'G', 'R', 'X'):
6252         case VA_FOURCC('R', 'G', 'B', 'A'):
6253         case VA_FOURCC('R', 'G', 'B', 'X'):
6254             status = i965_image_pl1_rgbx_processing(ctx,
6255                                                src_surface,
6256                                                src_rect,
6257                                                dst_surface,
6258                                                dst_rect);
6259             break;
6260         default:
6261             status = VA_STATUS_ERROR_UNIMPLEMENTED;
6262             break;
6263         }
6264         
6265         _i965UnlockMutex(&i965->pp_mutex);
6266     }
6267
6268     return status;
6269 }       
6270
6271 static void
6272 gen8_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
6273 {
6274     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
6275     pp_context->surface_state_binding_table.bo = NULL;
6276
6277     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
6278     pp_context->pp_dndi_context.stmm_bo = NULL;
6279
6280     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
6281     pp_context->pp_dn_context.stmm_bo = NULL;
6282
6283     if (pp_context->instruction_state.bo) {
6284         dri_bo_unreference(pp_context->instruction_state.bo);
6285         pp_context->instruction_state.bo = NULL;
6286     }
6287
6288     if (pp_context->indirect_state.bo) {
6289         dri_bo_unreference(pp_context->indirect_state.bo);
6290         pp_context->indirect_state.bo = NULL;
6291     }
6292
6293     if (pp_context->dynamic_state.bo) {
6294         dri_bo_unreference(pp_context->dynamic_state.bo);
6295         pp_context->dynamic_state.bo = NULL;
6296     }
6297
6298     free(pp_context->pp_static_parameter);
6299     free(pp_context->pp_inline_parameter);
6300     pp_context->pp_static_parameter = NULL;
6301     pp_context->pp_inline_parameter = NULL;
6302 }
6303
6304 static void
6305 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
6306 {
6307     int i;
6308
6309     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
6310     pp_context->surface_state_binding_table.bo = NULL;
6311
6312     dri_bo_unreference(pp_context->curbe.bo);
6313     pp_context->curbe.bo = NULL;
6314
6315     dri_bo_unreference(pp_context->sampler_state_table.bo);
6316     pp_context->sampler_state_table.bo = NULL;
6317
6318     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
6319     pp_context->sampler_state_table.bo_8x8 = NULL;
6320
6321     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
6322     pp_context->sampler_state_table.bo_8x8_uv = NULL;
6323
6324     dri_bo_unreference(pp_context->idrt.bo);
6325     pp_context->idrt.bo = NULL;
6326     pp_context->idrt.num_interface_descriptors = 0;
6327
6328     dri_bo_unreference(pp_context->vfe_state.bo);
6329     pp_context->vfe_state.bo = NULL;
6330
6331     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
6332     pp_context->pp_dndi_context.stmm_bo = NULL;
6333
6334     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
6335     pp_context->pp_dn_context.stmm_bo = NULL;
6336
6337     for (i = 0; i < NUM_PP_MODULES; i++) {
6338         struct pp_module *pp_module = &pp_context->pp_modules[i];
6339
6340         dri_bo_unreference(pp_module->kernel.bo);
6341         pp_module->kernel.bo = NULL;
6342     }
6343
6344     free(pp_context->pp_static_parameter);
6345     free(pp_context->pp_inline_parameter);
6346     pp_context->pp_static_parameter = NULL;
6347     pp_context->pp_inline_parameter = NULL;
6348 }
6349
6350 void
6351 i965_post_processing_terminate(VADriverContextP ctx)
6352 {
6353     struct i965_driver_data *i965 = i965_driver_data(ctx);
6354     struct i965_post_processing_context *pp_context = i965->pp_context;
6355
6356     if (pp_context) {
6357         if (IS_GEN8(i965->intel.device_id)) {
6358             gen8_post_processing_context_finalize(pp_context);
6359         } else {
6360             i965_post_processing_context_finalize(pp_context);
6361         }
6362         free(pp_context);
6363     }
6364
6365     i965->pp_context = NULL;
6366 }
6367
6368 #define VPP_CURBE_ALLOCATION_SIZE       32
6369
6370
6371 static void
6372 gen8_post_processing_context_init(VADriverContextP ctx,
6373                                   struct i965_post_processing_context *pp_context,
6374                                   struct intel_batchbuffer *batch)
6375 {
6376     struct i965_driver_data *i965 = i965_driver_data(ctx);
6377     int i, kernel_size;
6378     unsigned int kernel_offset, end_offset;
6379     unsigned char *kernel_ptr;
6380     struct pp_module *pp_module;
6381
6382     {
6383         pp_context->vfe_gpu_state.max_num_threads = 60;
6384         pp_context->vfe_gpu_state.num_urb_entries = 59;
6385         pp_context->vfe_gpu_state.gpgpu_mode = 0;
6386         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
6387         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
6388     }
6389
6390     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen8));
6391
6392     if (IS_GEN8(i965->intel.device_id))
6393         memcpy(pp_context->pp_modules, pp_modules_gen8, sizeof(pp_context->pp_modules));
6394     else {
6395         /* should never get here !!! */
6396         assert(0);
6397     }
6398
6399     kernel_size = 4096 ;
6400
6401     for (i = 0; i < NUM_PP_MODULES; i++) {
6402         pp_module = &pp_context->pp_modules[i];
6403
6404         if (pp_module->kernel.bin && pp_module->kernel.size) {
6405             kernel_size += pp_module->kernel.size;
6406         }
6407     }
6408
6409     pp_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
6410                                   "kernel shader",
6411                                   kernel_size,
6412                                   0x1000);
6413     if (pp_context->instruction_state.bo == NULL) {
6414         WARN_ONCE("failure to allocate the buffer space for kernel shader in VPP\n");
6415         return;
6416     }
6417
6418     assert(pp_context->instruction_state.bo);
6419
6420
6421     pp_context->instruction_state.bo_size = kernel_size;
6422     pp_context->instruction_state.end_offset = 0;
6423     end_offset = 0;
6424
6425     dri_bo_map(pp_context->instruction_state.bo, 1);
6426     kernel_ptr = (unsigned char *)(pp_context->instruction_state.bo->virtual);
6427
6428     for (i = 0; i < NUM_PP_MODULES; i++) {
6429         pp_module = &pp_context->pp_modules[i];
6430
6431         kernel_offset = ALIGN(end_offset, 64);
6432         pp_module->kernel.kernel_offset = kernel_offset;
6433
6434         if (pp_module->kernel.bin && pp_module->kernel.size) {
6435
6436             memcpy(kernel_ptr + kernel_offset, pp_module->kernel.bin, pp_module->kernel.size);
6437             end_offset = kernel_offset + pp_module->kernel.size;
6438         }
6439     }
6440
6441     pp_context->instruction_state.end_offset = ALIGN(end_offset, 64);
6442
6443     dri_bo_unmap(pp_context->instruction_state.bo);
6444
6445     /* static & inline parameters */
6446     if (IS_GEN8(i965->intel.device_id)) {
6447         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
6448         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
6449     }
6450
6451     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
6452     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
6453     pp_context->pp_dndi_context.frame_order = -1;
6454     pp_context->batch = batch;
6455
6456     pp_context->idrt_size = 5 * sizeof(struct gen8_interface_descriptor_data);
6457     pp_context->curbe_size = 256;
6458 }
6459
6460 static void
6461 i965_post_processing_context_init(VADriverContextP ctx,
6462                                   struct i965_post_processing_context *pp_context,
6463                                   struct intel_batchbuffer *batch)
6464 {
6465     struct i965_driver_data *i965 = i965_driver_data(ctx);
6466     int i;
6467
6468     if (IS_GEN8(i965->intel.device_id)) {
6469         gen8_post_processing_context_init(ctx, pp_context, batch);
6470         return;
6471     };
6472
6473     if (IS_IRONLAKE(i965->intel.device_id)) {
6474         pp_context->urb.size = URB_SIZE((&i965->intel));
6475         pp_context->urb.num_vfe_entries = 32;
6476         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
6477         pp_context->urb.num_cs_entries = 1;
6478         pp_context->urb.size_cs_entry = 2;
6479         pp_context->urb.vfe_start = 0;
6480         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
6481             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
6482         assert(pp_context->urb.cs_start +
6483             pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
6484     } else {
6485         pp_context->vfe_gpu_state.max_num_threads = 60;
6486         pp_context->vfe_gpu_state.num_urb_entries = 59;
6487         pp_context->vfe_gpu_state.gpgpu_mode = 0;
6488         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
6489         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
6490     }
6491     
6492
6493     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
6494     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
6495     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
6496     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
6497
6498     if (IS_HASWELL(i965->intel.device_id))
6499         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
6500     else if (IS_GEN7(i965->intel.device_id))
6501         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
6502     else if (IS_GEN6(i965->intel.device_id))
6503         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
6504     else if (IS_IRONLAKE(i965->intel.device_id))
6505         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
6506
6507     for (i = 0; i < NUM_PP_MODULES; i++) {
6508         struct pp_module *pp_module = &pp_context->pp_modules[i];
6509         dri_bo_unreference(pp_module->kernel.bo);
6510         if (pp_module->kernel.bin && pp_module->kernel.size) {
6511             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
6512                                                 pp_module->kernel.name,
6513                                                 pp_module->kernel.size,
6514                                                 4096);
6515             assert(pp_module->kernel.bo);
6516             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
6517         } else {
6518             pp_module->kernel.bo = NULL;
6519         }
6520     }
6521
6522     /* static & inline parameters */
6523     if (IS_GEN7(i965->intel.device_id) ||
6524         IS_GEN8(i965->intel.device_id)) {
6525         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
6526         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
6527     } else {
6528         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
6529         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
6530     }
6531
6532     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
6533     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
6534     pp_context->pp_dndi_context.frame_order = -1;
6535     pp_context->batch = batch;
6536 }
6537
6538 bool
6539 i965_post_processing_init(VADriverContextP ctx)
6540 {
6541     struct i965_driver_data *i965 = i965_driver_data(ctx);
6542     struct i965_post_processing_context *pp_context = i965->pp_context;
6543
6544     if (HAS_PP(i965)) {
6545         if (pp_context == NULL) {
6546             pp_context = calloc(1, sizeof(*pp_context));
6547             i965_post_processing_context_init(ctx, pp_context, i965->pp_batch);
6548             i965->pp_context = pp_context;
6549         }
6550     }
6551
6552     return true;
6553 }
6554
6555 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
6556     PP_NULL,    /* VAProcFilterNone */
6557     PP_NV12_DN, /* VAProcFilterNoiseReduction */
6558     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
6559     PP_NULL,    /* VAProcFilterSharpening */
6560     PP_NULL,    /* VAProcFilterColorBalance */
6561 };
6562
6563 static const int proc_frame_to_pp_frame[3] = {
6564     I965_SURFACE_FLAG_FRAME,
6565     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
6566     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
6567 };
6568
6569 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
6570
6571 VAStatus 
6572 i965_proc_picture(VADriverContextP ctx, 
6573                   VAProfile profile, 
6574                   union codec_state *codec_state,
6575                   struct hw_context *hw_context)
6576 {
6577     struct i965_driver_data *i965 = i965_driver_data(ctx);
6578     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6579     struct proc_state *proc_state = &codec_state->proc;
6580     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
6581     struct object_surface *obj_surface;
6582     struct i965_surface src_surface, dst_surface;
6583     VARectangle src_rect, dst_rect;
6584     VAStatus status;
6585     int i;
6586     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
6587     int num_tmp_surfaces = 0;
6588     unsigned int tiling = 0, swizzle = 0;
6589     int in_width, in_height;
6590
6591     if (pipeline_param->surface == VA_INVALID_ID ||
6592         proc_state->current_render_target == VA_INVALID_ID) {
6593         status = VA_STATUS_ERROR_INVALID_SURFACE;
6594         goto error;
6595     }
6596
6597     obj_surface = SURFACE(pipeline_param->surface);
6598
6599     if (!obj_surface) {
6600         status = VA_STATUS_ERROR_INVALID_SURFACE;
6601         goto error;
6602     }
6603
6604     if (!obj_surface->bo) {
6605         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
6606         goto error;
6607     }
6608
6609     if (pipeline_param->num_filters && !pipeline_param->filters) {
6610         status = VA_STATUS_ERROR_INVALID_PARAMETER;
6611         goto error;
6612     }
6613
6614     in_width = obj_surface->orig_width;
6615     in_height = obj_surface->orig_height;
6616     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
6617
6618     src_surface.base = (struct object_base *)obj_surface;
6619     src_surface.type = I965_SURFACE_TYPE_SURFACE;
6620     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6621
6622     VASurfaceID out_surface_id = VA_INVALID_ID;
6623     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
6624         src_surface.base = (struct object_base *)obj_surface;
6625         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6626         src_surface.flags = I965_SURFACE_FLAG_FRAME;
6627         src_rect.x = 0;
6628         src_rect.y = 0;
6629         src_rect.width = in_width;
6630         src_rect.height = in_height;
6631
6632         status = i965_CreateSurfaces(ctx,
6633                                      in_width,
6634                                      in_height,
6635                                      VA_RT_FORMAT_YUV420,
6636                                      1,
6637                                      &out_surface_id);
6638         assert(status == VA_STATUS_SUCCESS);
6639         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6640         obj_surface = SURFACE(out_surface_id);
6641         assert(obj_surface);
6642         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
6643
6644         dst_surface.base = (struct object_base *)obj_surface;
6645         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6646         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
6647         dst_rect.x = 0;
6648         dst_rect.y = 0;
6649         dst_rect.width = in_width;
6650         dst_rect.height = in_height;
6651
6652         status = i965_image_processing(ctx,
6653                                        &src_surface,
6654                                        &src_rect,
6655                                        &dst_surface,
6656                                        &dst_rect);
6657         assert(status == VA_STATUS_SUCCESS);
6658
6659         src_surface.base = (struct object_base *)obj_surface;
6660         src_surface.type = I965_SURFACE_TYPE_SURFACE;
6661         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
6662     }
6663
6664     if (pipeline_param->surface_region) {
6665         src_rect.x = pipeline_param->surface_region->x;
6666         src_rect.y = pipeline_param->surface_region->y;
6667         src_rect.width = pipeline_param->surface_region->width;
6668         src_rect.height = pipeline_param->surface_region->height;
6669     } else {
6670         src_rect.x = 0;
6671         src_rect.y = 0;
6672         src_rect.width = in_width;
6673         src_rect.height = in_height;
6674     }
6675
6676     if (pipeline_param->output_region) {
6677         dst_rect.x = pipeline_param->output_region->x;
6678         dst_rect.y = pipeline_param->output_region->y;
6679         dst_rect.width = pipeline_param->output_region->width;
6680         dst_rect.height = pipeline_param->output_region->height;
6681     } else {
6682         dst_rect.x = 0;
6683         dst_rect.y = 0;
6684         dst_rect.width = in_width;
6685         dst_rect.height = in_height;
6686     }
6687
6688     proc_context->pp_context.pipeline_param = pipeline_param;
6689
6690     for (i = 0; i < pipeline_param->num_filters; i++) {
6691         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
6692         VAProcFilterParameterBufferBase *filter_param = NULL;
6693         VAProcFilterType filter_type;
6694         int kernel_index;
6695
6696         if (!obj_buffer ||
6697             !obj_buffer->buffer_store ||
6698             !obj_buffer->buffer_store->buffer) {
6699             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
6700             goto error;
6701         }
6702
6703         out_surface_id = VA_INVALID_ID;
6704         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
6705         filter_type = filter_param->type;
6706         kernel_index = procfilter_to_pp_flag[filter_type];
6707
6708         if (kernel_index != PP_NULL &&
6709             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
6710             status = i965_CreateSurfaces(ctx,
6711                                          in_width,
6712                                          in_height,
6713                                          VA_RT_FORMAT_YUV420,
6714                                          1,
6715                                          &out_surface_id);
6716             assert(status == VA_STATUS_SUCCESS);
6717             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6718             obj_surface = SURFACE(out_surface_id);
6719             assert(obj_surface);
6720             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6721             dst_surface.base = (struct object_base *)obj_surface;
6722             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6723             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
6724                                                    &src_surface,
6725                                                    &src_rect,
6726                                                    &dst_surface,
6727                                                    &src_rect,
6728                                                    kernel_index,
6729                                                    filter_param);
6730
6731             if (status == VA_STATUS_SUCCESS) {
6732                 src_surface.base = dst_surface.base;
6733                 src_surface.type = dst_surface.type;
6734                 src_surface.flags = dst_surface.flags;
6735             }
6736         }
6737     }
6738
6739     proc_context->pp_context.pipeline_param = NULL;
6740     obj_surface = SURFACE(proc_state->current_render_target);
6741     
6742     if (!obj_surface) {
6743         status = VA_STATUS_ERROR_INVALID_SURFACE;
6744         goto error;
6745     }
6746
6747     int csc_needed = 0;
6748     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
6749         csc_needed = 1;
6750         out_surface_id = VA_INVALID_ID;
6751         status = i965_CreateSurfaces(ctx,
6752                                      obj_surface->orig_width,
6753                                      obj_surface->orig_height,
6754                                      VA_RT_FORMAT_YUV420, 
6755                                      1,
6756                                      &out_surface_id);
6757         assert(status == VA_STATUS_SUCCESS);
6758         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
6759         struct object_surface *csc_surface = SURFACE(out_surface_id);
6760         assert(csc_surface);
6761         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6762         dst_surface.base = (struct object_base *)csc_surface;
6763     } else {
6764         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
6765         dst_surface.base = (struct object_base *)obj_surface;
6766     }
6767
6768     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6769     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
6770
6771     // load/save doesn't support different origin offset for src and dst surface
6772     if (src_rect.width == dst_rect.width &&
6773         src_rect.height == dst_rect.height &&
6774         src_rect.x == dst_rect.x &&
6775         src_rect.y == dst_rect.y) {
6776         i965_post_processing_internal(ctx, &proc_context->pp_context,
6777                                       &src_surface,
6778                                       &src_rect,
6779                                       &dst_surface,
6780                                       &dst_rect,
6781                                       PP_NV12_LOAD_SAVE_N12,
6782                                       NULL);
6783     } else {
6784
6785         i965_post_processing_internal(ctx, &proc_context->pp_context,
6786                                       &src_surface,
6787                                       &src_rect,
6788                                       &dst_surface,
6789                                       &dst_rect,
6790                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
6791                                       PP_NV12_AVS : PP_NV12_SCALING,
6792                                       NULL);
6793     }
6794
6795     if (csc_needed) {
6796         src_surface.base = dst_surface.base;
6797         src_surface.type = dst_surface.type;
6798         src_surface.flags = dst_surface.flags;
6799         dst_surface.base = (struct object_base *)obj_surface;
6800         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
6801         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
6802     }
6803     
6804     if (num_tmp_surfaces)
6805         i965_DestroySurfaces(ctx,
6806                              tmp_surfaces,
6807                              num_tmp_surfaces);
6808
6809     intel_batchbuffer_flush(hw_context->batch);
6810
6811     return VA_STATUS_SUCCESS;
6812
6813 error:
6814     if (num_tmp_surfaces)
6815         i965_DestroySurfaces(ctx,
6816                              tmp_surfaces,
6817                              num_tmp_surfaces);
6818
6819     return status;
6820 }
6821
6822 static void
6823 i965_proc_context_destroy(void *hw_context)
6824 {
6825     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
6826
6827     i965_post_processing_context_finalize(&proc_context->pp_context);
6828     intel_batchbuffer_free(proc_context->base.batch);
6829     free(proc_context);
6830 }
6831
6832 struct hw_context *
6833 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
6834 {
6835     struct intel_driver_data *intel = intel_driver_data(ctx);
6836     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
6837
6838     proc_context->base.destroy = i965_proc_context_destroy;
6839     proc_context->base.run = i965_proc_picture;
6840     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
6841     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
6842
6843     return (struct hw_context *)proc_context;
6844 }
6845
6846