render: don't deallocate surface storage of displayed frames.
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 extern VAStatus
44 vpp_surface_convert(VADriverContextP ctx,
45                     struct object_surface *src_obj_surf,
46                     struct object_surface *dst_obj_surf);
47
48 #define HAS_VPP(ctx) ((ctx)->codec_info->has_vpp)
49
50 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
51                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
52
53 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
54 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
55
56 #define GPU_ASM_BLOCK_WIDTH         16
57 #define GPU_ASM_BLOCK_HEIGHT        8
58 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
59
60 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
116 };
117
118 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
120 };
121
122 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
123 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
124 };
125
126 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
127                                    const struct i965_surface *src_surface,
128                                    const VARectangle *src_rect,
129                                    struct i965_surface *dst_surface,
130                                    const VARectangle *dst_rect,
131                                    void *filter_param);
132 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
133                                             const struct i965_surface *src_surface,
134                                             const VARectangle *src_rect,
135                                             struct i965_surface *dst_surface,
136                                             const VARectangle *dst_rect,
137                                             void *filter_param);
138 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
139                                            const struct i965_surface *src_surface,
140                                            const VARectangle *src_rect,
141                                            struct i965_surface *dst_surface,
142                                            const VARectangle *dst_rect,
143                                            void *filter_param);
144 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
145                                              const struct i965_surface *src_surface,
146                                              const VARectangle *src_rect,
147                                              struct i965_surface *dst_surface,
148                                              const VARectangle *dst_rect,
149                                              void *filter_param);
150 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
151                                                 const struct i965_surface *src_surface,
152                                                 const VARectangle *src_rect,
153                                                 struct i965_surface *dst_surface,
154                                                 const VARectangle *dst_rect,
155                                                 void *filter_param);
156 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
157                                         const struct i965_surface *src_surface,
158                                         const VARectangle *src_rect,
159                                         struct i965_surface *dst_surface,
160                                         const VARectangle *dst_rect,
161                                         void *filter_param);
162 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
163                                       const struct i965_surface *src_surface,
164                                       const VARectangle *src_rect,
165                                       struct i965_surface *dst_surface,
166                                       const VARectangle *dst_rect,
167                                       void *filter_param);
168
169 static struct pp_module pp_modules_gen5[] = {
170     {
171         {
172             "NULL module (for testing)",
173             PP_NULL,
174             pp_null_gen5,
175             sizeof(pp_null_gen5),
176             NULL,
177         },
178
179         pp_null_initialize,
180     },
181
182     {
183         {
184             "NV12_NV12",
185             PP_NV12_LOAD_SAVE_N12,
186             pp_nv12_load_save_nv12_gen5,
187             sizeof(pp_nv12_load_save_nv12_gen5),
188             NULL,
189         },
190
191         pp_plx_load_save_plx_initialize,
192     },
193
194     {
195         {
196             "NV12_PL3",
197             PP_NV12_LOAD_SAVE_PL3,
198             pp_nv12_load_save_pl3_gen5,
199             sizeof(pp_nv12_load_save_pl3_gen5),
200             NULL,
201         },
202
203         pp_plx_load_save_plx_initialize,
204     },
205
206     {
207         {
208             "PL3_NV12",
209             PP_PL3_LOAD_SAVE_N12,
210             pp_pl3_load_save_nv12_gen5,
211             sizeof(pp_pl3_load_save_nv12_gen5),
212             NULL,
213         },
214
215         pp_plx_load_save_plx_initialize,
216     },
217
218     {
219         {
220             "PL3_PL3",
221             PP_PL3_LOAD_SAVE_PL3,
222             pp_pl3_load_save_pl3_gen5,
223             sizeof(pp_pl3_load_save_pl3_gen5),
224             NULL,
225         },
226
227         pp_plx_load_save_plx_initialize
228     },
229
230     {
231         {
232             "NV12 Scaling module",
233             PP_NV12_SCALING,
234             pp_nv12_scaling_gen5,
235             sizeof(pp_nv12_scaling_gen5),
236             NULL,
237         },
238
239         pp_nv12_scaling_initialize,
240     },
241
242     {
243         {
244             "NV12 AVS module",
245             PP_NV12_AVS,
246             pp_nv12_avs_gen5,
247             sizeof(pp_nv12_avs_gen5),
248             NULL,
249         },
250
251         pp_nv12_avs_initialize_nlas,
252     },
253
254     {
255         {
256             "NV12 DNDI module",
257             PP_NV12_DNDI,
258             pp_nv12_dndi_gen5,
259             sizeof(pp_nv12_dndi_gen5),
260             NULL,
261         },
262
263         pp_nv12_dndi_initialize,
264     },
265
266     {
267         {
268             "NV12 DN module",
269             PP_NV12_DN,
270             pp_nv12_dn_gen5,
271             sizeof(pp_nv12_dn_gen5),
272             NULL,
273         },
274
275         pp_nv12_dn_initialize,
276     },
277
278     {
279         {
280             "NV12_PA module",
281             PP_NV12_LOAD_SAVE_PA,
282             pp_nv12_load_save_pa_gen5,
283             sizeof(pp_nv12_load_save_pa_gen5),
284             NULL,
285         },
286     
287         pp_plx_load_save_plx_initialize,
288     },
289
290     {
291         {
292             "PL3_PA module",
293             PP_PL3_LOAD_SAVE_PA,
294             pp_pl3_load_save_pa_gen5,
295             sizeof(pp_pl3_load_save_pa_gen5),
296             NULL,
297         },
298     
299         pp_plx_load_save_plx_initialize,
300     },
301
302     {
303         {
304             "PA_NV12 module",
305             PP_PA_LOAD_SAVE_NV12,
306             pp_pa_load_save_nv12_gen5,
307             sizeof(pp_pa_load_save_nv12_gen5),
308             NULL,
309         },
310     
311         pp_plx_load_save_plx_initialize,
312     },
313
314     {
315         {
316             "PA_PL3 module",
317             PP_PA_LOAD_SAVE_PL3,
318             pp_pa_load_save_pl3_gen5,
319             sizeof(pp_pa_load_save_pl3_gen5),
320             NULL,
321         },
322     
323         pp_plx_load_save_plx_initialize,
324     },
325
326     {
327         {
328             "PA_PA module",
329             PP_PA_LOAD_SAVE_PA,
330             pp_pa_load_save_pa_gen5,
331             sizeof(pp_pa_load_save_pa_gen5),
332             NULL,
333         },
334
335         pp_plx_load_save_plx_initialize,
336     },
337
338     {
339         {
340             "RGBX_NV12 module",
341             PP_RGBX_LOAD_SAVE_NV12,
342             pp_rgbx_load_save_nv12_gen5,
343             sizeof(pp_rgbx_load_save_nv12_gen5),
344             NULL,
345         },
346     
347         pp_plx_load_save_plx_initialize,
348     },
349             
350     {
351         {
352             "NV12_RGBX module",
353             PP_NV12_LOAD_SAVE_RGBX,
354             pp_nv12_load_save_rgbx_gen5,
355             sizeof(pp_nv12_load_save_rgbx_gen5),
356             NULL,
357         },
358     
359         pp_plx_load_save_plx_initialize,
360     },
361 };
362
363 static const uint32_t pp_null_gen6[][4] = {
364 #include "shaders/post_processing/gen5_6/null.g6b"
365 };
366
367 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
368 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
369 };
370
371 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
372 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
373 };
374
375 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
376 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
377 };
378
379 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
380 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
381 };
382
383 static const uint32_t pp_nv12_scaling_gen6[][4] = {
384 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
385 };
386
387 static const uint32_t pp_nv12_avs_gen6[][4] = {
388 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
389 };
390
391 static const uint32_t pp_nv12_dndi_gen6[][4] = {
392 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
393 };
394
395 static const uint32_t pp_nv12_dn_gen6[][4] = {
396 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
397 };
398
399 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
400 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
401 };
402
403 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
404 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
405 };
406
407 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
408 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
409 };
410
411 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
412 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
413 };
414
415 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
416 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
417 };
418
419 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
420 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
421 };
422
423 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
424 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
425 };
426
427 static struct pp_module pp_modules_gen6[] = {
428     {
429         {
430             "NULL module (for testing)",
431             PP_NULL,
432             pp_null_gen6,
433             sizeof(pp_null_gen6),
434             NULL,
435         },
436
437         pp_null_initialize,
438     },
439
440     {
441         {
442             "NV12_NV12",
443             PP_NV12_LOAD_SAVE_N12,
444             pp_nv12_load_save_nv12_gen6,
445             sizeof(pp_nv12_load_save_nv12_gen6),
446             NULL,
447         },
448
449         pp_plx_load_save_plx_initialize,
450     },
451
452     {
453         {
454             "NV12_PL3",
455             PP_NV12_LOAD_SAVE_PL3,
456             pp_nv12_load_save_pl3_gen6,
457             sizeof(pp_nv12_load_save_pl3_gen6),
458             NULL,
459         },
460         
461         pp_plx_load_save_plx_initialize,
462     },
463
464     {
465         {
466             "PL3_NV12",
467             PP_PL3_LOAD_SAVE_N12,
468             pp_pl3_load_save_nv12_gen6,
469             sizeof(pp_pl3_load_save_nv12_gen6),
470             NULL,
471         },
472
473         pp_plx_load_save_plx_initialize,
474     },
475
476     {
477         {
478             "PL3_PL3",
479             PP_PL3_LOAD_SAVE_PL3,
480             pp_pl3_load_save_pl3_gen6,
481             sizeof(pp_pl3_load_save_pl3_gen6),
482             NULL,
483         },
484
485         pp_plx_load_save_plx_initialize,
486     },
487
488     {
489         {
490             "NV12 Scaling module",
491             PP_NV12_SCALING,
492             pp_nv12_scaling_gen6,
493             sizeof(pp_nv12_scaling_gen6),
494             NULL,
495         },
496
497         gen6_nv12_scaling_initialize,
498     },
499
500     {
501         {
502             "NV12 AVS module",
503             PP_NV12_AVS,
504             pp_nv12_avs_gen6,
505             sizeof(pp_nv12_avs_gen6),
506             NULL,
507         },
508
509         pp_nv12_avs_initialize_nlas,
510     },
511
512     {
513         {
514             "NV12 DNDI module",
515             PP_NV12_DNDI,
516             pp_nv12_dndi_gen6,
517             sizeof(pp_nv12_dndi_gen6),
518             NULL,
519         },
520
521         pp_nv12_dndi_initialize,
522     },
523
524     {
525         {
526             "NV12 DN module",
527             PP_NV12_DN,
528             pp_nv12_dn_gen6,
529             sizeof(pp_nv12_dn_gen6),
530             NULL,
531         },
532
533         pp_nv12_dn_initialize,
534     },
535     {
536         {
537             "NV12_PA module",
538             PP_NV12_LOAD_SAVE_PA,
539             pp_nv12_load_save_pa_gen6,
540             sizeof(pp_nv12_load_save_pa_gen6),
541             NULL,
542         },
543     
544         pp_plx_load_save_plx_initialize,
545     },
546
547     {
548         {
549             "PL3_PA module",
550             PP_PL3_LOAD_SAVE_PA,
551             pp_pl3_load_save_pa_gen6,
552             sizeof(pp_pl3_load_save_pa_gen6),
553             NULL,
554         },
555     
556         pp_plx_load_save_plx_initialize,
557     },
558
559     {
560         {
561             "PA_NV12 module",
562             PP_PA_LOAD_SAVE_NV12,
563             pp_pa_load_save_nv12_gen6,
564             sizeof(pp_pa_load_save_nv12_gen6),
565             NULL,
566         },
567     
568         pp_plx_load_save_plx_initialize,
569     },
570
571     {
572         {
573             "PA_PL3 module",
574             PP_PA_LOAD_SAVE_PL3,
575             pp_pa_load_save_pl3_gen6,
576             sizeof(pp_pa_load_save_pl3_gen6),
577             NULL,
578         },
579     
580         pp_plx_load_save_plx_initialize,
581     },
582
583     {
584         {
585             "PA_PA module",
586             PP_PA_LOAD_SAVE_PA,
587             pp_pa_load_save_pa_gen6,
588             sizeof(pp_pa_load_save_pa_gen6),
589             NULL,
590         },
591
592         pp_plx_load_save_plx_initialize,
593     },
594
595     {
596         {
597             "RGBX_NV12 module",
598             PP_RGBX_LOAD_SAVE_NV12,
599             pp_rgbx_load_save_nv12_gen6,
600             sizeof(pp_rgbx_load_save_nv12_gen6),
601             NULL,
602         },
603     
604         pp_plx_load_save_plx_initialize,
605     },
606
607     {
608         {
609             "NV12_RGBX module",
610             PP_NV12_LOAD_SAVE_RGBX,
611             pp_nv12_load_save_rgbx_gen6,
612             sizeof(pp_nv12_load_save_rgbx_gen6),
613             NULL,
614         },
615     
616         pp_plx_load_save_plx_initialize,
617     },
618 };
619
620 static const uint32_t pp_null_gen7[][4] = {
621 };
622
623 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
625 };
626
627 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
628 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
629 };
630
631 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
632 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
633 };
634
635 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
636 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
637 };
638
639 static const uint32_t pp_nv12_scaling_gen7[][4] = {
640 #include "shaders/post_processing/gen7/avs.g7b"
641 };
642
643 static const uint32_t pp_nv12_avs_gen7[][4] = {
644 #include "shaders/post_processing/gen7/avs.g7b"
645 };
646
647 static const uint32_t pp_nv12_dndi_gen7[][4] = {
648 #include "shaders/post_processing/gen7/dndi.g7b"
649 };
650
651 static const uint32_t pp_nv12_dn_gen7[][4] = {
652 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
653 };
654 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
655 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
656 };
657 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
658 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
659 };
660 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
661 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
662 };
663 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
664 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
665 };
666 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
667 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
668 };
669 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
670 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
671 };
672 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
673 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
674 };
675
676 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
677                                            const struct i965_surface *src_surface,
678                                            const VARectangle *src_rect,
679                                            struct i965_surface *dst_surface,
680                                            const VARectangle *dst_rect,
681                                            void *filter_param);
682 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
683                                              const struct i965_surface *src_surface,
684                                              const VARectangle *src_rect,
685                                              struct i965_surface *dst_surface,
686                                              const VARectangle *dst_rect,
687                                              void *filter_param);
688 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
689                                            const struct i965_surface *src_surface,
690                                            const VARectangle *src_rect,
691                                            struct i965_surface *dst_surface,
692                                            const VARectangle *dst_rect,
693                                            void *filter_param);
694
695 static struct pp_module pp_modules_gen7[] = {
696     {
697         {
698             "NULL module (for testing)",
699             PP_NULL,
700             pp_null_gen7,
701             sizeof(pp_null_gen7),
702             NULL,
703         },
704
705         pp_null_initialize,
706     },
707
708     {
709         {
710             "NV12_NV12",
711             PP_NV12_LOAD_SAVE_N12,
712             pp_nv12_load_save_nv12_gen7,
713             sizeof(pp_nv12_load_save_nv12_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12_PL3",
723             PP_NV12_LOAD_SAVE_PL3,
724             pp_nv12_load_save_pl3_gen7,
725             sizeof(pp_nv12_load_save_pl3_gen7),
726             NULL,
727         },
728         
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "PL3_NV12",
735             PP_PL3_LOAD_SAVE_N12,
736             pp_pl3_load_save_nv12_gen7,
737             sizeof(pp_pl3_load_save_nv12_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "PL3_PL3",
747             PP_PL3_LOAD_SAVE_PL3,
748             pp_pl3_load_save_pl3_gen7,
749             sizeof(pp_pl3_load_save_pl3_gen7),
750             NULL,
751         },
752
753         gen7_pp_plx_avs_initialize,
754     },
755
756     {
757         {
758             "NV12 Scaling module",
759             PP_NV12_SCALING,
760             pp_nv12_scaling_gen7,
761             sizeof(pp_nv12_scaling_gen7),
762             NULL,
763         },
764
765         gen7_pp_plx_avs_initialize,
766     },
767
768     {
769         {
770             "NV12 AVS module",
771             PP_NV12_AVS,
772             pp_nv12_avs_gen7,
773             sizeof(pp_nv12_avs_gen7),
774             NULL,
775         },
776
777         gen7_pp_plx_avs_initialize,
778     },
779
780     {
781         {
782             "NV12 DNDI module",
783             PP_NV12_DNDI,
784             pp_nv12_dndi_gen7,
785             sizeof(pp_nv12_dndi_gen7),
786             NULL,
787         },
788
789         gen7_pp_nv12_dndi_initialize,
790     },
791
792     {
793         {
794             "NV12 DN module",
795             PP_NV12_DN,
796             pp_nv12_dn_gen7,
797             sizeof(pp_nv12_dn_gen7),
798             NULL,
799         },
800
801         gen7_pp_nv12_dn_initialize,
802     },
803     {
804         {
805             "NV12_PA module",
806             PP_NV12_LOAD_SAVE_PA,
807             pp_nv12_load_save_pa_gen7,
808             sizeof(pp_nv12_load_save_pa_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814
815     {
816         {
817             "PL3_PA module",
818             PP_PL3_LOAD_SAVE_PA,
819             pp_pl3_load_save_pa_gen7,
820             sizeof(pp_pl3_load_save_pa_gen7),
821             NULL,
822         },
823     
824         gen7_pp_plx_avs_initialize,
825     },
826
827     {
828         {
829             "PA_NV12 module",
830             PP_PA_LOAD_SAVE_NV12,
831             pp_pa_load_save_nv12_gen7,
832             sizeof(pp_pa_load_save_nv12_gen7),
833             NULL,
834         },
835     
836         gen7_pp_plx_avs_initialize,
837     },
838
839     {
840         {
841             "PA_PL3 module",
842             PP_PA_LOAD_SAVE_PL3,
843             pp_pa_load_save_pl3_gen7,
844             sizeof(pp_pa_load_save_pl3_gen7),
845             NULL,
846         },
847     
848         gen7_pp_plx_avs_initialize,
849     },
850
851     {
852         {
853             "PA_PA module",
854             PP_PA_LOAD_SAVE_PA,
855             pp_pa_load_save_pa_gen7,
856             sizeof(pp_pa_load_save_pa_gen7),
857             NULL,
858         },
859
860         gen7_pp_plx_avs_initialize,
861     },
862
863     {
864         {
865             "RGBX_NV12 module",
866             PP_RGBX_LOAD_SAVE_NV12,
867             pp_rgbx_load_save_nv12_gen7,
868             sizeof(pp_rgbx_load_save_nv12_gen7),
869             NULL,
870         },
871     
872         gen7_pp_plx_avs_initialize,
873     },
874
875     {
876         {
877             "NV12_RGBX module",
878             PP_NV12_LOAD_SAVE_RGBX,
879             pp_nv12_load_save_rgbx_gen7,
880             sizeof(pp_nv12_load_save_rgbx_gen7),
881             NULL,
882         },
883     
884         gen7_pp_plx_avs_initialize,
885     },
886             
887 };
888
889 static const uint32_t pp_null_gen75[][4] = {
890 };
891
892 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
893 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
894 };
895
896 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
897 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
898 };
899
900 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
902 };
903
904 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
905 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
906 };
907
908 static const uint32_t pp_nv12_scaling_gen75[][4] = {
909 #include "shaders/post_processing/gen7/avs.g75b"
910 };
911
912 static const uint32_t pp_nv12_avs_gen75[][4] = {
913 #include "shaders/post_processing/gen7/avs.g75b"
914 };
915
916 static const uint32_t pp_nv12_dndi_gen75[][4] = {
917 // #include "shaders/post_processing/gen7/dndi.g75b"
918 };
919
920 static const uint32_t pp_nv12_dn_gen75[][4] = {
921 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
922 };
923 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
924 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
925 };
926 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
927 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
928 };
929 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
930 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
931 };
932 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
933 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
934 };
935 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
936 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
937 };
938 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
939 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
940 };
941 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
942 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
943 };
944
945 static struct pp_module pp_modules_gen75[] = {
946     {
947         {
948             "NULL module (for testing)",
949             PP_NULL,
950             pp_null_gen75,
951             sizeof(pp_null_gen75),
952             NULL,
953         },
954
955         pp_null_initialize,
956     },
957
958     {
959         {
960             "NV12_NV12",
961             PP_NV12_LOAD_SAVE_N12,
962             pp_nv12_load_save_nv12_gen75,
963             sizeof(pp_nv12_load_save_nv12_gen75),
964             NULL,
965         },
966
967         gen7_pp_plx_avs_initialize,
968     },
969
970     {
971         {
972             "NV12_PL3",
973             PP_NV12_LOAD_SAVE_PL3,
974             pp_nv12_load_save_pl3_gen75,
975             sizeof(pp_nv12_load_save_pl3_gen75),
976             NULL,
977         },
978         
979         gen7_pp_plx_avs_initialize,
980     },
981
982     {
983         {
984             "PL3_NV12",
985             PP_PL3_LOAD_SAVE_N12,
986             pp_pl3_load_save_nv12_gen75,
987             sizeof(pp_pl3_load_save_nv12_gen75),
988             NULL,
989         },
990
991         gen7_pp_plx_avs_initialize,
992     },
993
994     {
995         {
996             "PL3_PL3",
997             PP_PL3_LOAD_SAVE_PL3,
998             pp_pl3_load_save_pl3_gen75,
999             sizeof(pp_pl3_load_save_pl3_gen75),
1000             NULL,
1001         },
1002
1003         gen7_pp_plx_avs_initialize,
1004     },
1005
1006     {
1007         {
1008             "NV12 Scaling module",
1009             PP_NV12_SCALING,
1010             pp_nv12_scaling_gen75,
1011             sizeof(pp_nv12_scaling_gen75),
1012             NULL,
1013         },
1014
1015         gen7_pp_plx_avs_initialize,
1016     },
1017
1018     {
1019         {
1020             "NV12 AVS module",
1021             PP_NV12_AVS,
1022             pp_nv12_avs_gen75,
1023             sizeof(pp_nv12_avs_gen75),
1024             NULL,
1025         },
1026
1027         gen7_pp_plx_avs_initialize,
1028     },
1029
1030     {
1031         {
1032             "NV12 DNDI module",
1033             PP_NV12_DNDI,
1034             pp_nv12_dndi_gen75,
1035             sizeof(pp_nv12_dndi_gen75),
1036             NULL,
1037         },
1038
1039         gen7_pp_nv12_dn_initialize,
1040     },
1041
1042     {
1043         {
1044             "NV12 DN module",
1045             PP_NV12_DN,
1046             pp_nv12_dn_gen75,
1047             sizeof(pp_nv12_dn_gen75),
1048             NULL,
1049         },
1050
1051         gen7_pp_nv12_dn_initialize,
1052     },
1053
1054     {
1055         {
1056             "NV12_PA module",
1057             PP_NV12_LOAD_SAVE_PA,
1058             pp_nv12_load_save_pa_gen75,
1059             sizeof(pp_nv12_load_save_pa_gen75),
1060             NULL,
1061         },
1062     
1063         gen7_pp_plx_avs_initialize,
1064     },
1065
1066     {
1067         {
1068             "PL3_PA module",
1069             PP_PL3_LOAD_SAVE_PA,
1070             pp_pl3_load_save_pa_gen75,
1071             sizeof(pp_pl3_load_save_pa_gen75),
1072             NULL,
1073         },
1074     
1075         gen7_pp_plx_avs_initialize,
1076     },
1077
1078     {
1079         {
1080             "PA_NV12 module",
1081             PP_PA_LOAD_SAVE_NV12,
1082             pp_pa_load_save_nv12_gen75,
1083             sizeof(pp_pa_load_save_nv12_gen75),
1084             NULL,
1085         },
1086     
1087         gen7_pp_plx_avs_initialize,
1088     },
1089
1090     {
1091         {
1092             "PA_PL3 module",
1093             PP_PA_LOAD_SAVE_PL3,
1094             pp_pa_load_save_pl3_gen75,
1095             sizeof(pp_pa_load_save_pl3_gen75),
1096             NULL,
1097         },
1098     
1099         gen7_pp_plx_avs_initialize,
1100     },
1101
1102     {
1103         {
1104             "PA_PA module",
1105             PP_PA_LOAD_SAVE_PA,
1106             pp_pa_load_save_pa_gen75,
1107             sizeof(pp_pa_load_save_pa_gen75),
1108             NULL,
1109         },
1110
1111         gen7_pp_plx_avs_initialize,
1112     },
1113
1114     {
1115         {
1116             "RGBX_NV12 module",
1117             PP_RGBX_LOAD_SAVE_NV12,
1118             pp_rgbx_load_save_nv12_gen75,
1119             sizeof(pp_rgbx_load_save_nv12_gen75),
1120             NULL,
1121         },
1122     
1123         gen7_pp_plx_avs_initialize,
1124     },
1125
1126     {
1127         {
1128             "NV12_RGBX module",
1129             PP_NV12_LOAD_SAVE_RGBX,
1130             pp_nv12_load_save_rgbx_gen75,
1131             sizeof(pp_nv12_load_save_rgbx_gen75),
1132             NULL,
1133         },
1134     
1135         gen7_pp_plx_avs_initialize,
1136     },
1137             
1138 };
1139
1140 static int
1141 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1142 {
1143     int fourcc;
1144
1145     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1146         struct object_image *obj_image = (struct object_image *)surface->base;
1147         fourcc = obj_image->image.format.fourcc;
1148     } else {
1149         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1150         fourcc = obj_surface->fourcc;
1151     }
1152
1153     return fourcc;
1154 }
1155
1156 static void
1157 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1158 {
1159     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1160         struct object_image *obj_image = (struct object_image *)surface->base;
1161
1162         *width = obj_image->image.width;
1163         *height = obj_image->image.height;
1164     } else {
1165         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1166
1167         *width = obj_surface->orig_width;
1168         *height = obj_surface->orig_height;
1169     }
1170 }
1171
1172 static void
1173 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1174 {
1175     switch (tiling) {
1176     case I915_TILING_NONE:
1177         ss->ss3.tiled_surface = 0;
1178         ss->ss3.tile_walk = 0;
1179         break;
1180     case I915_TILING_X:
1181         ss->ss3.tiled_surface = 1;
1182         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1183         break;
1184     case I915_TILING_Y:
1185         ss->ss3.tiled_surface = 1;
1186         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1187         break;
1188     }
1189 }
1190
1191 static void
1192 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1193 {
1194     switch (tiling) {
1195     case I915_TILING_NONE:
1196         ss->ss2.tiled_surface = 0;
1197         ss->ss2.tile_walk = 0;
1198         break;
1199     case I915_TILING_X:
1200         ss->ss2.tiled_surface = 1;
1201         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1202         break;
1203     case I915_TILING_Y:
1204         ss->ss2.tiled_surface = 1;
1205         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1206         break;
1207     }
1208 }
1209
1210 static void
1211 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1212 {
1213     switch (tiling) {
1214     case I915_TILING_NONE:
1215         ss->ss0.tiled_surface = 0;
1216         ss->ss0.tile_walk = 0;
1217         break;
1218     case I915_TILING_X:
1219         ss->ss0.tiled_surface = 1;
1220         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1221         break;
1222     case I915_TILING_Y:
1223         ss->ss0.tiled_surface = 1;
1224         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1225         break;
1226     }
1227 }
1228
1229 static void
1230 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1231 {
1232     switch (tiling) {
1233     case I915_TILING_NONE:
1234         ss->ss2.tiled_surface = 0;
1235         ss->ss2.tile_walk = 0;
1236         break;
1237     case I915_TILING_X:
1238         ss->ss2.tiled_surface = 1;
1239         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1240         break;
1241     case I915_TILING_Y:
1242         ss->ss2.tiled_surface = 1;
1243         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1244         break;
1245     }
1246 }
1247
1248 static void
1249 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1250 {
1251     struct i965_interface_descriptor *desc;
1252     dri_bo *bo;
1253     int pp_index = pp_context->current_pp;
1254
1255     bo = pp_context->idrt.bo;
1256     dri_bo_map(bo, 1);
1257     assert(bo->virtual);
1258     desc = bo->virtual;
1259     memset(desc, 0, sizeof(*desc));
1260     desc->desc0.grf_reg_blocks = 10;
1261     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1262     desc->desc1.const_urb_entry_read_offset = 0;
1263     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1264     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1265     desc->desc2.sampler_count = 0;
1266     desc->desc3.binding_table_entry_count = 0;
1267     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1268
1269     dri_bo_emit_reloc(bo,
1270                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1271                       desc->desc0.grf_reg_blocks,
1272                       offsetof(struct i965_interface_descriptor, desc0),
1273                       pp_context->pp_modules[pp_index].kernel.bo);
1274
1275     dri_bo_emit_reloc(bo,
1276                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1277                       desc->desc2.sampler_count << 2,
1278                       offsetof(struct i965_interface_descriptor, desc2),
1279                       pp_context->sampler_state_table.bo);
1280
1281     dri_bo_unmap(bo);
1282     pp_context->idrt.num_interface_descriptors++;
1283 }
1284
1285 static void
1286 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1287 {
1288     struct i965_vfe_state *vfe_state;
1289     dri_bo *bo;
1290
1291     bo = pp_context->vfe_state.bo;
1292     dri_bo_map(bo, 1);
1293     assert(bo->virtual);
1294     vfe_state = bo->virtual;
1295     memset(vfe_state, 0, sizeof(*vfe_state));
1296     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1297     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1298     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1299     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1300     vfe_state->vfe1.children_present = 0;
1301     vfe_state->vfe2.interface_descriptor_base = 
1302         pp_context->idrt.bo->offset >> 4; /* reloc */
1303     dri_bo_emit_reloc(bo,
1304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1305                       0,
1306                       offsetof(struct i965_vfe_state, vfe2),
1307                       pp_context->idrt.bo);
1308     dri_bo_unmap(bo);
1309 }
1310
1311 static void
1312 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1313 {
1314     unsigned char *constant_buffer;
1315     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1316
1317     assert(sizeof(*pp_static_parameter) == 128);
1318     dri_bo_map(pp_context->curbe.bo, 1);
1319     assert(pp_context->curbe.bo->virtual);
1320     constant_buffer = pp_context->curbe.bo->virtual;
1321     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1322     dri_bo_unmap(pp_context->curbe.bo);
1323 }
1324
1325 static void
1326 ironlake_pp_states_setup(VADriverContextP ctx,
1327                          struct i965_post_processing_context *pp_context)
1328 {
1329     ironlake_pp_interface_descriptor_table(pp_context);
1330     ironlake_pp_vfe_state(pp_context);
1331     ironlake_pp_upload_constants(pp_context);
1332 }
1333
1334 static void
1335 ironlake_pp_pipeline_select(VADriverContextP ctx,
1336                             struct i965_post_processing_context *pp_context)
1337 {
1338     struct intel_batchbuffer *batch = pp_context->batch;
1339
1340     BEGIN_BATCH(batch, 1);
1341     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1342     ADVANCE_BATCH(batch);
1343 }
1344
1345 static void
1346 ironlake_pp_urb_layout(VADriverContextP ctx,
1347                        struct i965_post_processing_context *pp_context)
1348 {
1349     struct intel_batchbuffer *batch = pp_context->batch;
1350     unsigned int vfe_fence, cs_fence;
1351
1352     vfe_fence = pp_context->urb.cs_start;
1353     cs_fence = pp_context->urb.size;
1354
1355     BEGIN_BATCH(batch, 3);
1356     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1357     OUT_BATCH(batch, 0);
1358     OUT_BATCH(batch, 
1359               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1360               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1361     ADVANCE_BATCH(batch);
1362 }
1363
1364 static void
1365 ironlake_pp_state_base_address(VADriverContextP ctx,
1366                                struct i965_post_processing_context *pp_context)
1367 {
1368     struct intel_batchbuffer *batch = pp_context->batch;
1369
1370     BEGIN_BATCH(batch, 8);
1371     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1372     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1373     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1374     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1375     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1376     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1377     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1378     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1379     ADVANCE_BATCH(batch);
1380 }
1381
1382 static void
1383 ironlake_pp_state_pointers(VADriverContextP ctx,
1384                            struct i965_post_processing_context *pp_context)
1385 {
1386     struct intel_batchbuffer *batch = pp_context->batch;
1387
1388     BEGIN_BATCH(batch, 3);
1389     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1390     OUT_BATCH(batch, 0);
1391     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1392     ADVANCE_BATCH(batch);
1393 }
1394
1395 static void 
1396 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1397                           struct i965_post_processing_context *pp_context)
1398 {
1399     struct intel_batchbuffer *batch = pp_context->batch;
1400
1401     BEGIN_BATCH(batch, 2);
1402     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1403     OUT_BATCH(batch,
1404               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1405               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1406     ADVANCE_BATCH(batch);
1407 }
1408
1409 static void
1410 ironlake_pp_constant_buffer(VADriverContextP ctx,
1411                             struct i965_post_processing_context *pp_context)
1412 {
1413     struct intel_batchbuffer *batch = pp_context->batch;
1414
1415     BEGIN_BATCH(batch, 2);
1416     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1417     OUT_RELOC(batch, pp_context->curbe.bo,
1418               I915_GEM_DOMAIN_INSTRUCTION, 0,
1419               pp_context->urb.size_cs_entry - 1);
1420     ADVANCE_BATCH(batch);    
1421 }
1422
1423 static void
1424 ironlake_pp_object_walker(VADriverContextP ctx,
1425                           struct i965_post_processing_context *pp_context)
1426 {
1427     struct intel_batchbuffer *batch = pp_context->batch;
1428     int x, x_steps, y, y_steps;
1429     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1430
1431     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1432     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1433
1434     for (y = 0; y < y_steps; y++) {
1435         for (x = 0; x < x_steps; x++) {
1436             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1437                 BEGIN_BATCH(batch, 20);
1438                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1439                 OUT_BATCH(batch, 0);
1440                 OUT_BATCH(batch, 0); /* no indirect data */
1441                 OUT_BATCH(batch, 0);
1442
1443                 /* inline data grf 5-6 */
1444                 assert(sizeof(*pp_inline_parameter) == 64);
1445                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1446
1447                 ADVANCE_BATCH(batch);
1448             }
1449         }
1450     }
1451 }
1452
1453 static void
1454 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1455                            struct i965_post_processing_context *pp_context)
1456 {
1457     struct intel_batchbuffer *batch = pp_context->batch;
1458
1459     intel_batchbuffer_start_atomic(batch, 0x1000);
1460     intel_batchbuffer_emit_mi_flush(batch);
1461     ironlake_pp_pipeline_select(ctx, pp_context);
1462     ironlake_pp_state_base_address(ctx, pp_context);
1463     ironlake_pp_state_pointers(ctx, pp_context);
1464     ironlake_pp_urb_layout(ctx, pp_context);
1465     ironlake_pp_cs_urb_layout(ctx, pp_context);
1466     ironlake_pp_constant_buffer(ctx, pp_context);
1467     ironlake_pp_object_walker(ctx, pp_context);
1468     intel_batchbuffer_end_atomic(batch);
1469 }
1470
1471 // update u/v offset when the surface format are packed yuv
1472 static void i965_update_src_surface_static_parameter(
1473     VADriverContextP    ctx, 
1474     struct i965_post_processing_context *pp_context,
1475     const struct i965_surface *surface)
1476 {
1477     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1478     int fourcc = pp_get_surface_fourcc(ctx, surface);
1479
1480     switch (fourcc) {
1481     case VA_FOURCC_YUY2:
1482         pp_static_parameter->grf1.source_packed_u_offset = 1;
1483         pp_static_parameter->grf1.source_packed_v_offset = 3;
1484         break;
1485     case VA_FOURCC_UYVY:
1486         pp_static_parameter->grf1.source_packed_y_offset = 1;
1487         pp_static_parameter->grf1.source_packed_v_offset = 2;
1488         break;
1489     case VA_FOURCC_BGRX:
1490     case VA_FOURCC_BGRA:
1491         pp_static_parameter->grf1.source_rgb_layout = 0;
1492         break;
1493     case VA_FOURCC_RGBX:
1494     case VA_FOURCC_RGBA:
1495         pp_static_parameter->grf1.source_rgb_layout = 1;
1496         break;
1497     default:
1498         break;
1499     }
1500     
1501 }
1502
1503 static void i965_update_dst_surface_static_parameter(
1504     VADriverContextP    ctx, 
1505     struct i965_post_processing_context *pp_context,
1506     const struct i965_surface *surface)
1507 {
1508     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1509     int fourcc = pp_get_surface_fourcc(ctx, surface);
1510
1511     switch (fourcc) {
1512     case VA_FOURCC_YUY2:
1513         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1514         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1515         break;
1516     case VA_FOURCC_UYVY:
1517         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1518         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1519         break;
1520     case VA_FOURCC_BGRX:
1521     case VA_FOURCC_BGRA:
1522         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1523         break;
1524     case VA_FOURCC_RGBX:
1525     case VA_FOURCC_RGBA:
1526         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1527         break;
1528     default:
1529         break;
1530     }
1531     
1532 }
1533
1534 static void
1535 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1536                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1537                           int width, int height, int pitch, int format, 
1538                           int index, int is_target)
1539 {
1540     struct i965_surface_state *ss;
1541     dri_bo *ss_bo;
1542     unsigned int tiling;
1543     unsigned int swizzle;
1544
1545     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1546     ss_bo = pp_context->surface_state_binding_table.bo;
1547     assert(ss_bo);
1548
1549     dri_bo_map(ss_bo, True);
1550     assert(ss_bo->virtual);
1551     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1552     memset(ss, 0, sizeof(*ss));
1553     ss->ss0.surface_type = I965_SURFACE_2D;
1554     ss->ss0.surface_format = format;
1555     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1556     ss->ss2.width = width - 1;
1557     ss->ss2.height = height - 1;
1558     ss->ss3.pitch = pitch - 1;
1559     pp_set_surface_tiling(ss, tiling);
1560     dri_bo_emit_reloc(ss_bo,
1561                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1562                       surf_bo_offset,
1563                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1564                       surf_bo);
1565     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1566     dri_bo_unmap(ss_bo);
1567 }
1568
1569 static void
1570 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1571                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1572                            int width, int height, int wpitch,
1573                            int xoffset, int yoffset,
1574                            int format, int interleave_chroma,
1575                            int index)
1576 {
1577     struct i965_surface_state2 *ss2;
1578     dri_bo *ss2_bo;
1579     unsigned int tiling;
1580     unsigned int swizzle;
1581
1582     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1583     ss2_bo = pp_context->surface_state_binding_table.bo;
1584     assert(ss2_bo);
1585
1586     dri_bo_map(ss2_bo, True);
1587     assert(ss2_bo->virtual);
1588     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1589     memset(ss2, 0, sizeof(*ss2));
1590     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1591     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1592     ss2->ss1.width = width - 1;
1593     ss2->ss1.height = height - 1;
1594     ss2->ss2.pitch = wpitch - 1;
1595     ss2->ss2.interleave_chroma = interleave_chroma;
1596     ss2->ss2.surface_format = format;
1597     ss2->ss3.x_offset_for_cb = xoffset;
1598     ss2->ss3.y_offset_for_cb = yoffset;
1599     pp_set_surface2_tiling(ss2, tiling);
1600     dri_bo_emit_reloc(ss2_bo,
1601                       I915_GEM_DOMAIN_RENDER, 0,
1602                       surf_bo_offset,
1603                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1604                       surf_bo);
1605     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1606     dri_bo_unmap(ss2_bo);
1607 }
1608
1609 static void
1610 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1611                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1612                           int width, int height, int pitch, int format, 
1613                           int index, int is_target)
1614 {
1615     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1616     struct gen7_surface_state *ss;
1617     dri_bo *ss_bo;
1618     unsigned int tiling;
1619     unsigned int swizzle;
1620
1621     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1622     ss_bo = pp_context->surface_state_binding_table.bo;
1623     assert(ss_bo);
1624
1625     dri_bo_map(ss_bo, True);
1626     assert(ss_bo->virtual);
1627     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1628     memset(ss, 0, sizeof(*ss));
1629     ss->ss0.surface_type = I965_SURFACE_2D;
1630     ss->ss0.surface_format = format;
1631     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1632     ss->ss2.width = width - 1;
1633     ss->ss2.height = height - 1;
1634     ss->ss3.pitch = pitch - 1;
1635     gen7_pp_set_surface_tiling(ss, tiling);
1636     if (IS_HASWELL(i965->intel.device_info))
1637         gen7_render_set_surface_scs(ss);
1638     dri_bo_emit_reloc(ss_bo,
1639                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1640                       surf_bo_offset,
1641                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1642                       surf_bo);
1643     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1644     dri_bo_unmap(ss_bo);
1645 }
1646
1647 static void
1648 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1649                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1650                            int width, int height, int wpitch,
1651                            int xoffset, int yoffset,
1652                            int format, int interleave_chroma,
1653                            int index)
1654 {
1655     struct gen7_surface_state2 *ss2;
1656     dri_bo *ss2_bo;
1657     unsigned int tiling;
1658     unsigned int swizzle;
1659
1660     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1661     ss2_bo = pp_context->surface_state_binding_table.bo;
1662     assert(ss2_bo);
1663
1664     dri_bo_map(ss2_bo, True);
1665     assert(ss2_bo->virtual);
1666     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1667     memset(ss2, 0, sizeof(*ss2));
1668     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1669     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1670     ss2->ss1.width = width - 1;
1671     ss2->ss1.height = height - 1;
1672     ss2->ss2.pitch = wpitch - 1;
1673     ss2->ss2.interleave_chroma = interleave_chroma;
1674     ss2->ss2.surface_format = format;
1675     ss2->ss3.x_offset_for_cb = xoffset;
1676     ss2->ss3.y_offset_for_cb = yoffset;
1677     gen7_pp_set_surface2_tiling(ss2, tiling);
1678     dri_bo_emit_reloc(ss2_bo,
1679                       I915_GEM_DOMAIN_RENDER, 0,
1680                       surf_bo_offset,
1681                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1682                       surf_bo);
1683     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1684     dri_bo_unmap(ss2_bo);
1685 }
1686
1687 static void 
1688 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1689                                 const struct i965_surface *surface, 
1690                                 int base_index, int is_target,
1691                                 int *width, int *height, int *pitch, int *offset)
1692 {
1693     struct object_surface *obj_surface;
1694     struct object_image *obj_image;
1695     dri_bo *bo;
1696     int fourcc = pp_get_surface_fourcc(ctx, surface);
1697     const int Y = 0;
1698     const int U = ((fourcc == VA_FOURCC_YV12) ||
1699                    (fourcc == VA_FOURCC_YV16))
1700                    ? 2 : 1;
1701     const int V = ((fourcc == VA_FOURCC_YV12) ||
1702                    (fourcc == VA_FOURCC_YV16))
1703                    ? 1 : 2;
1704     const int UV = 1;
1705     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1706     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1707     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1708                               fourcc == VA_FOURCC_RGBX ||
1709                               fourcc == VA_FOURCC_BGRA ||
1710                               fourcc == VA_FOURCC_BGRX);
1711     int scale_factor_of_1st_plane_width_in_byte = 1;
1712                               
1713     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1714         obj_surface = (struct object_surface *)surface->base;
1715         bo = obj_surface->bo;
1716         width[0] = obj_surface->orig_width;
1717         height[0] = obj_surface->orig_height;
1718         pitch[0] = obj_surface->width;
1719         offset[0] = 0;
1720
1721         if (full_packed_format) {
1722             scale_factor_of_1st_plane_width_in_byte = 4; 
1723         }
1724         else if (packed_yuv ) {
1725             scale_factor_of_1st_plane_width_in_byte =  2; 
1726         }
1727         else if (interleaved_uv) {
1728             width[1] = obj_surface->orig_width;
1729             height[1] = obj_surface->orig_height / 2;
1730             pitch[1] = obj_surface->width;
1731             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1732         } else {
1733             width[1] = obj_surface->orig_width / 2;
1734             height[1] = obj_surface->orig_height / 2;
1735             pitch[1] = obj_surface->width / 2;
1736             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1737             width[2] = obj_surface->orig_width / 2;
1738             height[2] = obj_surface->orig_height / 2;
1739             pitch[2] = obj_surface->width / 2;
1740             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1741         }
1742     } else {
1743         obj_image = (struct object_image *)surface->base;
1744         bo = obj_image->bo;
1745         width[0] = obj_image->image.width;
1746         height[0] = obj_image->image.height;
1747         pitch[0] = obj_image->image.pitches[0];
1748         offset[0] = obj_image->image.offsets[0];
1749
1750         if (full_packed_format) {
1751             scale_factor_of_1st_plane_width_in_byte = 4;
1752         }
1753         else if (packed_yuv ) {
1754             scale_factor_of_1st_plane_width_in_byte = 2;
1755         }
1756         else if (interleaved_uv) {
1757             width[1] = obj_image->image.width;
1758             height[1] = obj_image->image.height / 2;
1759             pitch[1] = obj_image->image.pitches[1];
1760             offset[1] = obj_image->image.offsets[1];
1761         } else {
1762             width[1] = obj_image->image.width / 2;
1763             height[1] = obj_image->image.height / 2;
1764             pitch[1] = obj_image->image.pitches[1];
1765             offset[1] = obj_image->image.offsets[1];
1766             width[2] = obj_image->image.width / 2;
1767             height[2] = obj_image->image.height / 2;
1768             pitch[2] = obj_image->image.pitches[2];
1769             offset[2] = obj_image->image.offsets[2];
1770             if (fourcc == VA_FOURCC_YV16) {
1771                 width[1] = obj_image->image.width / 2;
1772                 height[1] = obj_image->image.height;
1773                 width[2] = obj_image->image.width / 2;
1774                 height[2] = obj_image->image.height;
1775             }
1776         }
1777     }
1778
1779     /* Y surface */
1780     i965_pp_set_surface_state(ctx, pp_context,
1781                               bo, offset[Y],
1782                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1783                               base_index, is_target);
1784
1785     if (!packed_yuv && !full_packed_format) {
1786         if (interleaved_uv) {
1787             i965_pp_set_surface_state(ctx, pp_context,
1788                                       bo, offset[UV],
1789                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1790                                       base_index + 1, is_target);
1791         } else {
1792             /* U surface */
1793             i965_pp_set_surface_state(ctx, pp_context,
1794                                       bo, offset[U],
1795                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1796                                       base_index + 1, is_target);
1797
1798             /* V surface */
1799             i965_pp_set_surface_state(ctx, pp_context,
1800                                       bo, offset[V],
1801                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1802                                       base_index + 2, is_target);
1803         }
1804     }
1805
1806 }
1807
1808 static void 
1809 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1810                                      const struct i965_surface *surface, 
1811                                      int base_index, int is_target,
1812                                      const VARectangle *rect,
1813                                      int *width, int *height, int *pitch, int *offset)
1814 {
1815     struct object_surface *obj_surface;
1816     struct object_image *obj_image;
1817     dri_bo *bo;
1818     int fourcc = pp_get_surface_fourcc(ctx, surface);
1819     const i965_fourcc_info *fourcc_info = get_fourcc_info(fourcc);
1820
1821     if (fourcc_info == NULL)
1822         return;
1823
1824     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1825         obj_surface = (struct object_surface *)surface->base;
1826         bo = obj_surface->bo;
1827         width[0] = MIN(rect->x + rect->width, obj_surface->orig_width);
1828         height[0] = MIN(rect->y + rect->height, obj_surface->orig_height);
1829         pitch[0] = obj_surface->width;
1830         offset[0] = 0;
1831
1832         if (fourcc_info->num_planes == 1 && is_target)
1833             width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
1834
1835         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
1836         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
1837         pitch[1] = obj_surface->cb_cr_pitch;
1838         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1839
1840         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_surface->cb_cr_width);
1841         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_surface->cb_cr_height);
1842         pitch[2] = obj_surface->cb_cr_pitch;
1843         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1844     } else {
1845         int U = 0, V = 0;
1846
1847         /* FIXME: add support for ARGB/ABGR image */
1848         obj_image = (struct object_image *)surface->base;
1849         bo = obj_image->bo;
1850         width[0] = MIN(rect->x + rect->width, obj_image->image.width);
1851         height[0] = MIN(rect->y + rect->height, obj_image->image.height);
1852         pitch[0] = obj_image->image.pitches[0];
1853         offset[0] = obj_image->image.offsets[0];
1854
1855         if (fourcc_info->num_planes == 1) {
1856             if (is_target)
1857                 width[0] = width[0] * (fourcc_info->bpp[0] / 8); /* surface format is R8 */
1858         } else if (fourcc_info->num_planes == 2) {
1859             U = 1, V = 1;
1860         } else {
1861             assert(fourcc_info->num_components == 3);
1862
1863             U = fourcc_info->components[1].plane;
1864             V = fourcc_info->components[2].plane;
1865             assert((U == 1 && V == 2) ||
1866                    (U == 2 && V == 1));
1867         }
1868
1869         /* Always set width/height although they aren't used for fourcc_info->num_planes == 1 */
1870         width[1] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
1871         height[1] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
1872         pitch[1] = obj_image->image.pitches[U];
1873         offset[1] = obj_image->image.offsets[U];
1874
1875         width[2] = MIN(rect->x / fourcc_info->hfactor + rect->width / fourcc_info->hfactor, obj_image->image.width / fourcc_info->hfactor);
1876         height[2] = MIN(rect->y / fourcc_info->vfactor + rect->height / fourcc_info->vfactor, obj_image->image.height / fourcc_info->vfactor);
1877         pitch[2] = obj_image->image.pitches[V];
1878         offset[2] = obj_image->image.offsets[V];
1879     }
1880
1881     if (is_target) {
1882         gen7_pp_set_surface_state(ctx, pp_context,
1883                                   bo, 0,
1884                                   width[0] / 4, height[0], pitch[0],
1885                                   I965_SURFACEFORMAT_R8_UINT,
1886                                   base_index, 1);
1887
1888         if (fourcc_info->num_planes == 2) {
1889             gen7_pp_set_surface_state(ctx, pp_context,
1890                                       bo, offset[1],
1891                                       width[1] / 2, height[1], pitch[1],
1892                                       I965_SURFACEFORMAT_R8G8_SINT,
1893                                       base_index + 1, 1);
1894         } else if (fourcc_info->num_planes == 3) {
1895             gen7_pp_set_surface_state(ctx, pp_context,
1896                                       bo, offset[1],
1897                                       width[1] / 4, height[1], pitch[1],
1898                                       I965_SURFACEFORMAT_R8_SINT,
1899                                       base_index + 1, 1);
1900             gen7_pp_set_surface_state(ctx, pp_context,
1901                                       bo, offset[2],
1902                                       width[2] / 4, height[2], pitch[2],
1903                                       I965_SURFACEFORMAT_R8_SINT,
1904                                       base_index + 2, 1);
1905         }
1906
1907         if (fourcc_info->format == I965_COLOR_RGB) {
1908             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1909             /* the format is MSB: X-B-G-R */
1910             pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1911             if ((fourcc == VA_FOURCC_BGRA) ||
1912                 (fourcc == VA_FOURCC_BGRX)) {
1913                 /* It is stored as MSB: X-R-G-B */
1914                 pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1915             }
1916         }
1917     } else {
1918         int format0 = SURFACE_FORMAT_Y8_UNORM;
1919
1920         switch (fourcc) {
1921         case VA_FOURCC_YUY2:
1922             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1923             break;
1924
1925         case VA_FOURCC_UYVY:
1926             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1927             break;
1928
1929         default:
1930             break;
1931         }
1932
1933         if (fourcc_info->format == I965_COLOR_RGB) {
1934             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1935             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
1936             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
1937             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
1938             if ((fourcc == VA_FOURCC_BGRA) ||
1939                 (fourcc == VA_FOURCC_BGRX)) {
1940                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
1941             }
1942         }
1943
1944         gen7_pp_set_surface2_state(ctx, pp_context,
1945                                    bo, offset[0],
1946                                    width[0], height[0], pitch[0],
1947                                    0, 0,
1948                                    format0, 0,
1949                                    base_index);
1950
1951         if (fourcc_info->num_planes == 2) {
1952             gen7_pp_set_surface2_state(ctx, pp_context,
1953                                        bo, offset[1],
1954                                        width[1], height[1], pitch[1],
1955                                        0, 0,
1956                                        SURFACE_FORMAT_R8B8_UNORM, 0,
1957                                        base_index + 1);
1958         } else if (fourcc_info->num_planes == 3) {
1959             gen7_pp_set_surface2_state(ctx, pp_context,
1960                                        bo, offset[1],
1961                                        width[1], height[1], pitch[1],
1962                                        0, 0,
1963                                        SURFACE_FORMAT_R8_UNORM, 0,
1964                                        base_index + 1);
1965             gen7_pp_set_surface2_state(ctx, pp_context,
1966                                        bo, offset[2],
1967                                        width[2], height[2], pitch[2],
1968                                        0, 0,
1969                                        SURFACE_FORMAT_R8_UNORM, 0,
1970                                        base_index + 2);
1971         }
1972     }
1973 }
1974
1975 static int
1976 pp_null_x_steps(void *private_context)
1977 {
1978     return 1;
1979 }
1980
1981 static int
1982 pp_null_y_steps(void *private_context)
1983 {
1984     return 1;
1985 }
1986
1987 static int
1988 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1989 {
1990     return 0;
1991 }
1992
1993 static VAStatus
1994 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1995                    const struct i965_surface *src_surface,
1996                    const VARectangle *src_rect,
1997                    struct i965_surface *dst_surface,
1998                    const VARectangle *dst_rect,
1999                    void *filter_param)
2000 {
2001     /* private function & data */
2002     pp_context->pp_x_steps = pp_null_x_steps;
2003     pp_context->pp_y_steps = pp_null_y_steps;
2004     pp_context->private_context = NULL;
2005     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2006
2007     dst_surface->flags = src_surface->flags;
2008
2009     return VA_STATUS_SUCCESS;
2010 }
2011
2012 static int
2013 pp_load_save_x_steps(void *private_context)
2014 {
2015     return 1;
2016 }
2017
2018 static int
2019 pp_load_save_y_steps(void *private_context)
2020 {
2021     struct pp_load_save_context *pp_load_save_context = private_context;
2022
2023     return pp_load_save_context->dest_h / 8;
2024 }
2025
2026 static int
2027 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2028 {
2029     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2030     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2031
2032     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2033     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2034
2035     return 0;
2036 }
2037
2038 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2039 {
2040     int i;
2041     /* x offset of dest surface must be dword aligned.
2042      * so we have to extend dst surface on left edge, and mask out pixels not interested
2043      */
2044     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2045         pp_context->block_horizontal_mask_left = 0;
2046         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2047         {
2048             pp_context->block_horizontal_mask_left |= 1<<i;
2049         }
2050     }
2051     else {
2052         pp_context->block_horizontal_mask_left = 0xffff;
2053     }
2054     
2055     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2056     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2057         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2058     }
2059     else {
2060         pp_context->block_horizontal_mask_right = 0xffff;
2061     }
2062     
2063     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2064         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2065     }
2066     else {
2067         pp_context->block_vertical_mask_bottom = 0xff;
2068     }
2069
2070 }
2071 static VAStatus
2072 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2073                                 const struct i965_surface *src_surface,
2074                                 const VARectangle *src_rect,
2075                                 struct i965_surface *dst_surface,
2076                                 const VARectangle *dst_rect,
2077                                 void *filter_param)
2078 {
2079     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2080     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2081     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2082     int width[3], height[3], pitch[3], offset[3];
2083
2084     /* source surface */
2085     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2086                                     width, height, pitch, offset);
2087
2088     /* destination surface */
2089     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2090                                     width, height, pitch, offset);
2091
2092     /* private function & data */
2093     pp_context->pp_x_steps = pp_load_save_x_steps;
2094     pp_context->pp_y_steps = pp_load_save_y_steps;
2095     pp_context->private_context = &pp_context->pp_load_save_context;
2096     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2097
2098     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2099     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2100     pp_load_save_context->dest_y = dst_rect->y;
2101     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2102     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2103
2104     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2105     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2106
2107     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2108     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2109
2110     // update u/v offset for packed yuv
2111     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2112     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2113
2114     dst_surface->flags = src_surface->flags;
2115
2116     return VA_STATUS_SUCCESS;
2117 }
2118
2119 static int
2120 pp_scaling_x_steps(void *private_context)
2121 {
2122     return 1;
2123 }
2124
2125 static int
2126 pp_scaling_y_steps(void *private_context)
2127 {
2128     struct pp_scaling_context *pp_scaling_context = private_context;
2129
2130     return pp_scaling_context->dest_h / 8;
2131 }
2132
2133 static int
2134 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2135 {
2136     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2137     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2138     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2139     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2140     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2141
2142     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2143     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2144     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2145     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2146     
2147     return 0;
2148 }
2149
2150 static VAStatus
2151 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2152                            const struct i965_surface *src_surface,
2153                            const VARectangle *src_rect,
2154                            struct i965_surface *dst_surface,
2155                            const VARectangle *dst_rect,
2156                            void *filter_param)
2157 {
2158     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2159     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2160     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2161     struct object_surface *obj_surface;
2162     struct i965_sampler_state *sampler_state;
2163     int in_w, in_h, in_wpitch, in_hpitch;
2164     int out_w, out_h, out_wpitch, out_hpitch;
2165
2166     /* source surface */
2167     obj_surface = (struct object_surface *)src_surface->base;
2168     in_w = obj_surface->orig_width;
2169     in_h = obj_surface->orig_height;
2170     in_wpitch = obj_surface->width;
2171     in_hpitch = obj_surface->height;
2172
2173     /* source Y surface index 1 */
2174     i965_pp_set_surface_state(ctx, pp_context,
2175                               obj_surface->bo, 0,
2176                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2177                               1, 0);
2178
2179     /* source UV surface index 2 */
2180     i965_pp_set_surface_state(ctx, pp_context,
2181                               obj_surface->bo, in_wpitch * in_hpitch,
2182                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2183                               2, 0);
2184
2185     /* destination surface */
2186     obj_surface = (struct object_surface *)dst_surface->base;
2187     out_w = obj_surface->orig_width;
2188     out_h = obj_surface->orig_height;
2189     out_wpitch = obj_surface->width;
2190     out_hpitch = obj_surface->height;
2191
2192     /* destination Y surface index 7 */
2193     i965_pp_set_surface_state(ctx, pp_context,
2194                               obj_surface->bo, 0,
2195                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2196                               7, 1);
2197
2198     /* destination UV surface index 8 */
2199     i965_pp_set_surface_state(ctx, pp_context,
2200                               obj_surface->bo, out_wpitch * out_hpitch,
2201                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2202                               8, 1);
2203
2204     /* sampler state */
2205     dri_bo_map(pp_context->sampler_state_table.bo, True);
2206     assert(pp_context->sampler_state_table.bo->virtual);
2207     sampler_state = pp_context->sampler_state_table.bo->virtual;
2208
2209     /* SIMD16 Y index 1 */
2210     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2211     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2212     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2213     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2214     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2215
2216     /* SIMD16 UV index 2 */
2217     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2218     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2219     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2220     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2221     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2222
2223     dri_bo_unmap(pp_context->sampler_state_table.bo);
2224
2225     /* private function & data */
2226     pp_context->pp_x_steps = pp_scaling_x_steps;
2227     pp_context->pp_y_steps = pp_scaling_y_steps;
2228     pp_context->private_context = &pp_context->pp_scaling_context;
2229     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2230
2231     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2232     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2233     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2234     pp_scaling_context->dest_y = dst_rect->y;
2235     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2236     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2237     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2238     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2239
2240     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2241
2242     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2243     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2244     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2245
2246     dst_surface->flags = src_surface->flags;
2247
2248     return VA_STATUS_SUCCESS;
2249 }
2250
2251 static int
2252 pp_avs_x_steps(void *private_context)
2253 {
2254     struct pp_avs_context *pp_avs_context = private_context;
2255
2256     return pp_avs_context->dest_w / 16;
2257 }
2258
2259 static int
2260 pp_avs_y_steps(void *private_context)
2261 {
2262     return 1;
2263 }
2264
2265 static int
2266 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2267 {
2268     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2269     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2270     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2271     float src_x_steping, src_y_steping, video_step_delta;
2272     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2273
2274     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2275         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2276         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2277     } else if (tmp_w >= pp_avs_context->dest_w) {
2278         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2279         pp_inline_parameter->grf6.video_step_delta = 0;
2280         
2281         if (x == 0) {
2282             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2283                 pp_avs_context->src_normalized_x;
2284         } else {
2285             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2286             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2287             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2288                 16 * 15 * video_step_delta / 2;
2289         }
2290     } else {
2291         int n0, n1, n2, nls_left, nls_right;
2292         int factor_a = 5, factor_b = 4;
2293         float f;
2294
2295         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2296         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2297         n2 = tmp_w / (16 * factor_a);
2298         nls_left = n0 + n2;
2299         nls_right = n1 + n2;
2300         f = (float) n2 * 16 / tmp_w;
2301         
2302         if (n0 < 5) {
2303             pp_inline_parameter->grf6.video_step_delta = 0.0;
2304
2305             if (x == 0) {
2306                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2307                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2308             } else {
2309                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2310                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2311                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2312                     16 * 15 * video_step_delta / 2;
2313             }
2314         } else {
2315             if (x < nls_left) {
2316                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2317                 float a = f / (nls_left * 16 * factor_b);
2318                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2319                 
2320                 pp_inline_parameter->grf6.video_step_delta = b;
2321
2322                 if (x == 0) {
2323                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2324                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2325                 } else {
2326                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2327                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2328                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2329                         16 * 15 * video_step_delta / 2;
2330                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2331                 }
2332             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2333                 /* scale the center linearly */
2334                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2335                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2336                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2337                     16 * 15 * video_step_delta / 2;
2338                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2339                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2340             } else {
2341                 float a = f / (nls_right * 16 * factor_b);
2342                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2343
2344                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2345                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2346                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2347                     16 * 15 * video_step_delta / 2;
2348                 pp_inline_parameter->grf6.video_step_delta = -b;
2349
2350                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2351                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2352                 else
2353                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2354             }
2355         }
2356     }
2357
2358     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2359     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2360     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2361     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2362
2363     return 0;
2364 }
2365
2366 static VAStatus
2367 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2368                        const struct i965_surface *src_surface,
2369                        const VARectangle *src_rect,
2370                        struct i965_surface *dst_surface,
2371                        const VARectangle *dst_rect,
2372                        void *filter_param,
2373                        int nlas)
2374 {
2375     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2376     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2377     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2378     struct object_surface *obj_surface;
2379     struct i965_sampler_8x8 *sampler_8x8;
2380     struct i965_sampler_8x8_state *sampler_8x8_state;
2381     int index;
2382     int in_w, in_h, in_wpitch, in_hpitch;
2383     int out_w, out_h, out_wpitch, out_hpitch;
2384     int i;
2385
2386     /* surface */
2387     obj_surface = (struct object_surface *)src_surface->base;
2388     in_w = obj_surface->orig_width;
2389     in_h = obj_surface->orig_height;
2390     in_wpitch = obj_surface->width;
2391     in_hpitch = obj_surface->height;
2392
2393     /* source Y surface index 1 */
2394     i965_pp_set_surface2_state(ctx, pp_context,
2395                                obj_surface->bo, 0,
2396                                in_w, in_h, in_wpitch,
2397                                0, 0,
2398                                SURFACE_FORMAT_Y8_UNORM, 0,
2399                                1);
2400
2401     /* source UV surface index 2 */
2402     i965_pp_set_surface2_state(ctx, pp_context,
2403                                obj_surface->bo, in_wpitch * in_hpitch,
2404                                in_w / 2, in_h / 2, in_wpitch,
2405                                0, 0,
2406                                SURFACE_FORMAT_R8B8_UNORM, 0,
2407                                2);
2408
2409     /* destination surface */
2410     obj_surface = (struct object_surface *)dst_surface->base;
2411     out_w = obj_surface->orig_width;
2412     out_h = obj_surface->orig_height;
2413     out_wpitch = obj_surface->width;
2414     out_hpitch = obj_surface->height;
2415     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2416
2417     /* destination Y surface index 7 */
2418     i965_pp_set_surface_state(ctx, pp_context,
2419                               obj_surface->bo, 0,
2420                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2421                               7, 1);
2422
2423     /* destination UV surface index 8 */
2424     i965_pp_set_surface_state(ctx, pp_context,
2425                               obj_surface->bo, out_wpitch * out_hpitch,
2426                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2427                               8, 1);
2428
2429     /* sampler 8x8 state */
2430     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2431     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2432     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2433     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2434     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2435
2436     for (i = 0; i < 17; i++) {
2437         /* for Y channel, currently ignore */
2438         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2439         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2440         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2441         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2442         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2443         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2444         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2445         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2446         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2447         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2448         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2449         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2450         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2451         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2452         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2453         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2454         /* for U/V channel, 0.25 */
2455         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2456         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2457         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2458         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2459         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2460         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2461         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2462         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2463         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2464         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2465         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2466         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2467         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2468         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2469         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2470         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2471     }
2472
2473     sampler_8x8_state->dw136.default_sharpness_level = 0;
2474     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2475     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2476     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2477     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2478
2479     /* sampler 8x8 */
2480     dri_bo_map(pp_context->sampler_state_table.bo, True);
2481     assert(pp_context->sampler_state_table.bo->virtual);
2482     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2483     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2484
2485     /* sample_8x8 Y index 1 */
2486     index = 1;
2487     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2488     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2489     sampler_8x8[index].dw0.ief_bypass = 1;
2490     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2491     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2492     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2493     sampler_8x8[index].dw2.global_noise_estimation = 22;
2494     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2495     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2496     sampler_8x8[index].dw3.strong_edge_weight = 7;
2497     sampler_8x8[index].dw3.regular_weight = 2;
2498     sampler_8x8[index].dw3.non_edge_weight = 0;
2499     sampler_8x8[index].dw3.gain_factor = 40;
2500     sampler_8x8[index].dw4.steepness_boost = 0;
2501     sampler_8x8[index].dw4.steepness_threshold = 0;
2502     sampler_8x8[index].dw4.mr_boost = 0;
2503     sampler_8x8[index].dw4.mr_threshold = 5;
2504     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2505     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2506     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2507     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2508     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2509     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2510     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2511     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2512     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2513     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2514     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2515     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2516     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2517     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2518     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2519     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2520     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2521     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2522     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2523     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2524     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2525     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2526     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2527     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2528     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2529     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2530     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2531     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2532     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2533     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2534     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2535     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2536     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2537     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2538     sampler_8x8[index].dw13.limiter_boost = 0;
2539     sampler_8x8[index].dw13.minimum_limiter = 10;
2540     sampler_8x8[index].dw13.maximum_limiter = 11;
2541     sampler_8x8[index].dw14.clip_limiter = 130;
2542     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2543                       I915_GEM_DOMAIN_RENDER, 
2544                       0,
2545                       0,
2546                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2547                       pp_context->sampler_state_table.bo_8x8);
2548
2549     /* sample_8x8 UV index 2 */
2550     index = 2;
2551     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2552     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2553     sampler_8x8[index].dw0.ief_bypass = 1;
2554     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2555     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2556     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2557     sampler_8x8[index].dw2.global_noise_estimation = 22;
2558     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2559     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2560     sampler_8x8[index].dw3.strong_edge_weight = 7;
2561     sampler_8x8[index].dw3.regular_weight = 2;
2562     sampler_8x8[index].dw3.non_edge_weight = 0;
2563     sampler_8x8[index].dw3.gain_factor = 40;
2564     sampler_8x8[index].dw4.steepness_boost = 0;
2565     sampler_8x8[index].dw4.steepness_threshold = 0;
2566     sampler_8x8[index].dw4.mr_boost = 0;
2567     sampler_8x8[index].dw4.mr_threshold = 5;
2568     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2569     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2570     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2571     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2572     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2573     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2574     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2575     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2576     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2577     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2578     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2579     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2580     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2581     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2582     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2583     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2584     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2585     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2586     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2587     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2588     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2589     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2590     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2591     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2592     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2593     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2594     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2595     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2596     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2597     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2598     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2599     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2600     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2601     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2602     sampler_8x8[index].dw13.limiter_boost = 0;
2603     sampler_8x8[index].dw13.minimum_limiter = 10;
2604     sampler_8x8[index].dw13.maximum_limiter = 11;
2605     sampler_8x8[index].dw14.clip_limiter = 130;
2606     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2607                       I915_GEM_DOMAIN_RENDER, 
2608                       0,
2609                       0,
2610                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2611                       pp_context->sampler_state_table.bo_8x8);
2612
2613     dri_bo_unmap(pp_context->sampler_state_table.bo);
2614
2615     /* private function & data */
2616     pp_context->pp_x_steps = pp_avs_x_steps;
2617     pp_context->pp_y_steps = pp_avs_y_steps;
2618     pp_context->private_context = &pp_context->pp_avs_context;
2619     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2620
2621     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2622     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2623     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2624     pp_avs_context->dest_y = dst_rect->y;
2625     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2626     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2627     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2628     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2629     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2630     pp_avs_context->src_h = src_rect->height;
2631
2632     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2633     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2634
2635     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2636     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2637     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2638     pp_inline_parameter->grf6.video_step_delta = 0.0;
2639
2640     dst_surface->flags = src_surface->flags;
2641
2642     return VA_STATUS_SUCCESS;
2643 }
2644
2645 static VAStatus
2646 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2647                             const struct i965_surface *src_surface,
2648                             const VARectangle *src_rect,
2649                             struct i965_surface *dst_surface,
2650                             const VARectangle *dst_rect,
2651                             void *filter_param)
2652 {
2653     return pp_nv12_avs_initialize(ctx, pp_context,
2654                                   src_surface,
2655                                   src_rect,
2656                                   dst_surface,
2657                                   dst_rect,
2658                                   filter_param,
2659                                   1);
2660 }
2661
2662 static VAStatus
2663 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2664                              const struct i965_surface *src_surface,
2665                              const VARectangle *src_rect,
2666                              struct i965_surface *dst_surface,
2667                              const VARectangle *dst_rect,
2668                              void *filter_param)
2669 {
2670     return pp_nv12_avs_initialize(ctx, pp_context,
2671                                   src_surface,
2672                                   src_rect,
2673                                   dst_surface,
2674                                   dst_rect,
2675                                   filter_param,
2676                                   0);    
2677 }
2678
2679 static int
2680 gen7_pp_avs_x_steps(void *private_context)
2681 {
2682     struct pp_avs_context *pp_avs_context = private_context;
2683
2684     return pp_avs_context->dest_w / 16;
2685 }
2686
2687 static int
2688 gen7_pp_avs_y_steps(void *private_context)
2689 {
2690     struct pp_avs_context *pp_avs_context = private_context;
2691
2692     return pp_avs_context->dest_h / 16;
2693 }
2694
2695 static int
2696 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2697 {
2698     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2699     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2700
2701     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2702     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2703     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2704     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
2705
2706     return 0;
2707 }
2708
2709 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2710                                               struct i965_post_processing_context *pp_context,
2711                                               const struct i965_surface *surface)
2712 {
2713     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2714     int fourcc = pp_get_surface_fourcc(ctx, surface);
2715     
2716     if (fourcc == VA_FOURCC_YUY2) {
2717         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2718         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2719         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2720     } else if (fourcc == VA_FOURCC_UYVY) {
2721         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2722         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2723         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2724     }
2725 }
2726
2727 static VAStatus
2728 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2729                            const struct i965_surface *src_surface,
2730                            const VARectangle *src_rect,
2731                            struct i965_surface *dst_surface,
2732                            const VARectangle *dst_rect,
2733                            void *filter_param)
2734 {
2735     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2736     struct i965_driver_data *i965 = i965_driver_data(ctx);
2737     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2738     struct gen7_sampler_8x8 *sampler_8x8;
2739     struct i965_sampler_8x8_state *sampler_8x8_state;
2740     int index, i;
2741     int width[3], height[3], pitch[3], offset[3];
2742     int src_width, src_height;
2743
2744     /* source surface */
2745     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2746                                          src_rect,
2747                                          width, height, pitch, offset);
2748     src_width = width[0];
2749     src_height = height[0];
2750
2751     /* destination surface */
2752     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2753                                          dst_rect,
2754                                          width, height, pitch, offset);
2755
2756     /* sampler 8x8 state */
2757     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2758     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2759     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2760     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2761     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2762
2763     for (i = 0; i < 17; i++) {
2764         float coff;
2765         coff = i;
2766         coff = coff / 16;
2767         /* for Y channel, currently ignore */
2768         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2769         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2770         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2771         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
2772         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2773         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2774         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2775         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2776         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2777         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2778         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2779         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2780         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2781         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2782         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2783         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2784         /* for U/V channel, 0.25 */
2785         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2786         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2787         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
2788         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2789         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2790         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
2791         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2792         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2793         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2794         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2795         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
2796         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2797         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2798         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
2799         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2800         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2801     }
2802
2803     sampler_8x8_state->dw136.default_sharpness_level = 0;
2804     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2805     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2806     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2807     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2808
2809     /* sampler 8x8 */
2810     dri_bo_map(pp_context->sampler_state_table.bo, True);
2811     assert(pp_context->sampler_state_table.bo->virtual);
2812     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2813     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2814
2815     /* sample_8x8 Y index 4 */
2816     index = 4;
2817     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2818     sampler_8x8[index].dw0.global_noise_estimation = 255;
2819     sampler_8x8[index].dw0.ief_bypass = 1;
2820
2821     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2822
2823     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2824     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2825     sampler_8x8[index].dw2.r5x_coefficient = 9;
2826     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2827     sampler_8x8[index].dw2.r5c_coefficient = 3;
2828
2829     sampler_8x8[index].dw3.r3x_coefficient = 27;
2830     sampler_8x8[index].dw3.r3c_coefficient = 5;
2831     sampler_8x8[index].dw3.gain_factor = 40;
2832     sampler_8x8[index].dw3.non_edge_weight = 1;
2833     sampler_8x8[index].dw3.regular_weight = 2;
2834     sampler_8x8[index].dw3.strong_edge_weight = 7;
2835     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2836
2837     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2838                       I915_GEM_DOMAIN_RENDER, 
2839                       0,
2840                       0,
2841                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2842                       pp_context->sampler_state_table.bo_8x8);
2843
2844     /* sample_8x8 UV index 8 */
2845     index = 8;
2846     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2847     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2848     sampler_8x8[index].dw0.global_noise_estimation = 255;
2849     sampler_8x8[index].dw0.ief_bypass = 1;
2850     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2851     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2852     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2853     sampler_8x8[index].dw2.r5x_coefficient = 9;
2854     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2855     sampler_8x8[index].dw2.r5c_coefficient = 3;
2856     sampler_8x8[index].dw3.r3x_coefficient = 27;
2857     sampler_8x8[index].dw3.r3c_coefficient = 5;
2858     sampler_8x8[index].dw3.gain_factor = 40;
2859     sampler_8x8[index].dw3.non_edge_weight = 1;
2860     sampler_8x8[index].dw3.regular_weight = 2;
2861     sampler_8x8[index].dw3.strong_edge_weight = 7;
2862     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2863
2864     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2865                       I915_GEM_DOMAIN_RENDER, 
2866                       0,
2867                       0,
2868                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2869                       pp_context->sampler_state_table.bo_8x8);
2870
2871     /* sampler_8x8 V, index 12 */
2872     index = 12;
2873     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2874     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2875     sampler_8x8[index].dw0.global_noise_estimation = 255;
2876     sampler_8x8[index].dw0.ief_bypass = 1;
2877     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2878     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2879     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2880     sampler_8x8[index].dw2.r5x_coefficient = 9;
2881     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2882     sampler_8x8[index].dw2.r5c_coefficient = 3;
2883     sampler_8x8[index].dw3.r3x_coefficient = 27;
2884     sampler_8x8[index].dw3.r3c_coefficient = 5;
2885     sampler_8x8[index].dw3.gain_factor = 40;
2886     sampler_8x8[index].dw3.non_edge_weight = 1;
2887     sampler_8x8[index].dw3.regular_weight = 2;
2888     sampler_8x8[index].dw3.strong_edge_weight = 7;
2889     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2890
2891     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2892                       I915_GEM_DOMAIN_RENDER, 
2893                       0,
2894                       0,
2895                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2896                       pp_context->sampler_state_table.bo_8x8);
2897
2898     dri_bo_unmap(pp_context->sampler_state_table.bo);
2899
2900     /* private function & data */
2901     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2902     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2903     pp_context->private_context = &pp_context->pp_avs_context;
2904     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2905
2906     pp_avs_context->dest_x = dst_rect->x;
2907     pp_avs_context->dest_y = dst_rect->y;
2908     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2909     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2910     pp_avs_context->src_w = src_rect->width;
2911     pp_avs_context->src_h = src_rect->height;
2912     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
2913
2914     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2915     dw = MAX(dw, dst_rect->width);
2916
2917     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2918     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2919     if (IS_HASWELL(i965->intel.device_info))
2920         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
2921
2922     if (pp_static_parameter->grf2.avs_wa_enable) {
2923         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
2924         if ((src_fourcc == VA_FOURCC_RGBA) ||
2925             (src_fourcc == VA_FOURCC_RGBX) ||
2926             (src_fourcc == VA_FOURCC_BGRA) ||
2927             (src_fourcc == VA_FOURCC_BGRX)) {
2928             pp_static_parameter->grf2.avs_wa_enable = 0;
2929         }
2930     }
2931         
2932     pp_static_parameter->grf2.avs_wa_width = src_width;
2933     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * src_width);
2934     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * src_width);
2935     pp_static_parameter->grf2.alpha = 255;
2936
2937     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2938     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
2939     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
2940         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
2941     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
2942         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
2943
2944     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2945
2946     dst_surface->flags = src_surface->flags;
2947
2948     return VA_STATUS_SUCCESS;
2949 }
2950
2951 static int
2952 pp_dndi_x_steps(void *private_context)
2953 {
2954     return 1;
2955 }
2956
2957 static int
2958 pp_dndi_y_steps(void *private_context)
2959 {
2960     struct pp_dndi_context *pp_dndi_context = private_context;
2961
2962     return pp_dndi_context->dest_h / 4;
2963 }
2964
2965 static int
2966 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2967 {
2968     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2969
2970     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2971     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2972
2973     return 0;
2974 }
2975
2976 static VAStatus
2977 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2978                         const struct i965_surface *src_surface,
2979                         const VARectangle *src_rect,
2980                         struct i965_surface *dst_surface,
2981                         const VARectangle *dst_rect,
2982                         void *filter_param)
2983 {
2984     struct i965_driver_data *i965 = i965_driver_data(ctx);
2985     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
2986     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2987     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2988     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
2989     struct i965_sampler_dndi *sampler_dndi;
2990     int index;
2991     int w, h;
2992     int orig_w, orig_h;
2993     int dndi_top_first = 1;
2994     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
2995     int is_first_frame = (pp_dndi_context->frame_order == -1);
2996
2997     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
2998         dndi_top_first = 0;
2999     else
3000         dndi_top_first = 1;
3001
3002     /* surface */
3003     current_in_obj_surface = (struct object_surface *)src_surface->base;
3004
3005     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
3006         previous_in_obj_surface = current_in_obj_surface;
3007         is_first_frame = 1;
3008     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
3009         if (pp_dndi_context->frame_order == 0) {
3010             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
3011             if (!pipeline_param ||
3012                 !pipeline_param->num_forward_references ||
3013                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
3014                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
3015
3016                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3017             } else {
3018                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
3019                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
3020
3021                 is_first_frame = 0;
3022             }
3023         } else if (pp_dndi_context->frame_order == 1) {
3024             vpp_surface_convert(ctx,
3025                                 pp_dndi_context->current_out_obj_surface,
3026                                 (struct object_surface *)dst_surface->base);
3027             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3028             is_first_frame = 0;
3029
3030             return VA_STATUS_SUCCESS_1;
3031         } else {
3032             previous_in_obj_surface = current_in_obj_surface;
3033             is_first_frame = 1;
3034         }
3035     } else {
3036         return VA_STATUS_ERROR_UNIMPLEMENTED;
3037     }
3038
3039     /* source (temporal reference) YUV surface index 5 */
3040     orig_w = previous_in_obj_surface->orig_width;
3041     orig_h = previous_in_obj_surface->orig_height;
3042     w = previous_in_obj_surface->width;
3043     h = previous_in_obj_surface->height;
3044     i965_pp_set_surface2_state(ctx, pp_context,
3045                                previous_in_obj_surface->bo, 0,
3046                                orig_w, orig_h, w,
3047                                0, h,
3048                                SURFACE_FORMAT_PLANAR_420_8, 1,
3049                                5);
3050
3051     /* source surface */
3052     orig_w = current_in_obj_surface->orig_width;
3053     orig_h = current_in_obj_surface->orig_height;
3054     w = current_in_obj_surface->width;
3055     h = current_in_obj_surface->height;
3056
3057     /* source UV surface index 2 */
3058     i965_pp_set_surface_state(ctx, pp_context,
3059                               current_in_obj_surface->bo, w * h,
3060                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3061                               2, 0);
3062
3063     /* source YUV surface index 4 */
3064     i965_pp_set_surface2_state(ctx, pp_context,
3065                                current_in_obj_surface->bo, 0,
3066                                orig_w, orig_h, w,
3067                                0, h,
3068                                SURFACE_FORMAT_PLANAR_420_8, 1,
3069                                4);
3070
3071     /* source STMM surface index 6 */
3072     if (pp_dndi_context->stmm_bo == NULL) {
3073         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3074                                                 "STMM surface",
3075                                                 w * h,
3076                                                 4096);
3077         assert(pp_dndi_context->stmm_bo);
3078     }
3079
3080     i965_pp_set_surface_state(ctx, pp_context,
3081                               pp_dndi_context->stmm_bo, 0,
3082                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3083                               6, 0);
3084
3085     /* destination (Previous frame) */
3086     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
3087     orig_w = previous_out_obj_surface->orig_width;
3088     orig_h = previous_out_obj_surface->orig_height;
3089     w = previous_out_obj_surface->width;
3090     h = previous_out_obj_surface->height;
3091
3092     if (is_first_frame) {
3093         current_out_obj_surface = previous_out_obj_surface;
3094     } else {
3095         VAStatus va_status;
3096
3097         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
3098             unsigned int tiling = 0, swizzle = 0;
3099             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
3100
3101             va_status = i965_CreateSurfaces(ctx,
3102                                             orig_w,
3103                                             orig_h,
3104                                             VA_RT_FORMAT_YUV420,
3105                                             1,
3106                                             &pp_dndi_context->current_out_surface);
3107             assert(va_status == VA_STATUS_SUCCESS);
3108             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
3109             assert(pp_dndi_context->current_out_obj_surface);
3110             i965_check_alloc_surface_bo(ctx,
3111                                         pp_dndi_context->current_out_obj_surface,
3112                                         tiling != I915_TILING_NONE,
3113                                         VA_FOURCC_NV12,
3114                                         SUBSAMPLE_YUV420);
3115         }
3116
3117         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
3118     }
3119
3120     /* destination (Previous frame) Y surface index 7 */
3121     i965_pp_set_surface_state(ctx, pp_context,
3122                               previous_out_obj_surface->bo, 0,
3123                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3124                               7, 1);
3125
3126     /* destination (Previous frame) UV surface index 8 */
3127     i965_pp_set_surface_state(ctx, pp_context,
3128                               previous_out_obj_surface->bo, w * h,
3129                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3130                               8, 1);
3131
3132     /* destination(Current frame) */
3133     orig_w = current_out_obj_surface->orig_width;
3134     orig_h = current_out_obj_surface->orig_height;
3135     w = current_out_obj_surface->width;
3136     h = current_out_obj_surface->height;
3137
3138     /* destination (Current frame) Y surface index xxx */
3139     i965_pp_set_surface_state(ctx, pp_context,
3140                               current_out_obj_surface->bo, 0,
3141                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3142                               10, 1);
3143
3144     /* destination (Current frame) UV surface index xxx */
3145     i965_pp_set_surface_state(ctx, pp_context,
3146                               current_out_obj_surface->bo, w * h,
3147                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3148                               11, 1);
3149
3150     /* STMM output surface, index 20 */
3151     i965_pp_set_surface_state(ctx, pp_context,
3152                               pp_dndi_context->stmm_bo, 0,
3153                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3154                               20, 1);
3155
3156     /* sampler dndi */
3157     dri_bo_map(pp_context->sampler_state_table.bo, True);
3158     assert(pp_context->sampler_state_table.bo->virtual);
3159     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3160     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3161
3162     /* sample dndi index 1 */
3163     index = 0;
3164     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3165     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3166     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3167     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3168
3169     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3170     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3171     sampler_dndi[index].dw1.stmm_c2 = 1;
3172     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3173     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3174
3175     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3176     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3177     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3178     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3179
3180     sampler_dndi[index].dw3.maximum_stmm = 150;
3181     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3182     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3183     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3184     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3185
3186     sampler_dndi[index].dw4.sdi_delta = 5;
3187     sampler_dndi[index].dw4.sdi_threshold = 100;
3188     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3189     sampler_dndi[index].dw4.stmm_shift_up = 1;
3190     sampler_dndi[index].dw4.stmm_shift_down = 0;
3191     sampler_dndi[index].dw4.minimum_stmm = 118;
3192
3193     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3194     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3195     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3196     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3197
3198     sampler_dndi[index].dw6.dn_enable = 1;
3199     sampler_dndi[index].dw6.di_enable = 1;
3200     sampler_dndi[index].dw6.di_partial = 0;
3201     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3202     sampler_dndi[index].dw6.dndi_stream_id = 0;
3203     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
3204     sampler_dndi[index].dw6.progressive_dn = 0;
3205     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3206     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3207     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3208
3209     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3210     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3211     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3212     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3213
3214     dri_bo_unmap(pp_context->sampler_state_table.bo);
3215
3216     /* private function & data */
3217     pp_context->pp_x_steps = pp_dndi_x_steps;
3218     pp_context->pp_y_steps = pp_dndi_y_steps;
3219     pp_context->private_context = &pp_context->pp_dndi_context;
3220     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3221
3222     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3223     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3224     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3225     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3226
3227     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3228     pp_inline_parameter->grf5.number_blocks = w / 16;
3229     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3230     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3231
3232     pp_dndi_context->dest_w = w;
3233     pp_dndi_context->dest_h = h;
3234
3235     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3236
3237     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3238
3239     return VA_STATUS_SUCCESS;
3240 }
3241
3242 static int
3243 pp_dn_x_steps(void *private_context)
3244 {
3245     return 1;
3246 }
3247
3248 static int
3249 pp_dn_y_steps(void *private_context)
3250 {
3251     struct pp_dn_context *pp_dn_context = private_context;
3252
3253     return pp_dn_context->dest_h / 8;
3254 }
3255
3256 static int
3257 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3258 {
3259     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3260
3261     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3262     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3263
3264     return 0;
3265 }
3266
3267 static VAStatus
3268 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3269                       const struct i965_surface *src_surface,
3270                       const VARectangle *src_rect,
3271                       struct i965_surface *dst_surface,
3272                       const VARectangle *dst_rect,
3273                       void *filter_param)
3274 {
3275     struct i965_driver_data *i965 = i965_driver_data(ctx);
3276     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3277     struct object_surface *obj_surface;
3278     struct i965_sampler_dndi *sampler_dndi;
3279     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3280     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3281     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3282     int index;
3283     int w, h;
3284     int orig_w, orig_h;
3285     int dn_strength = 15;
3286     int dndi_top_first = 1;
3287     int dn_progressive = 0;
3288
3289     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3290         dndi_top_first = 1;
3291         dn_progressive = 1;
3292     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3293         dndi_top_first = 1;
3294         dn_progressive = 0;
3295     } else {
3296         dndi_top_first = 0;
3297         dn_progressive = 0;
3298     }
3299
3300     if (dn_filter_param) {
3301         float value = dn_filter_param->value;
3302         
3303         if (value > 1.0)
3304             value = 1.0;
3305         
3306         if (value < 0.0)
3307             value = 0.0;
3308
3309         dn_strength = (int)(value * 31.0F);
3310     }
3311
3312     /* surface */
3313     obj_surface = (struct object_surface *)src_surface->base;
3314     orig_w = obj_surface->orig_width;
3315     orig_h = obj_surface->orig_height;
3316     w = obj_surface->width;
3317     h = obj_surface->height;
3318
3319     if (pp_dn_context->stmm_bo == NULL) {
3320         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3321                                               "STMM surface",
3322                                               w * h,
3323                                               4096);
3324         assert(pp_dn_context->stmm_bo);
3325     }
3326
3327     /* source UV surface index 2 */
3328     i965_pp_set_surface_state(ctx, pp_context,
3329                               obj_surface->bo, w * h,
3330                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3331                               2, 0);
3332
3333     /* source YUV surface index 4 */
3334     i965_pp_set_surface2_state(ctx, pp_context,
3335                                obj_surface->bo, 0,
3336                                orig_w, orig_h, w,
3337                                0, h,
3338                                SURFACE_FORMAT_PLANAR_420_8, 1,
3339                                4);
3340
3341     /* source STMM surface index 20 */
3342     i965_pp_set_surface_state(ctx, pp_context,
3343                               pp_dn_context->stmm_bo, 0,
3344                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3345                               20, 1);
3346
3347     /* destination surface */
3348     obj_surface = (struct object_surface *)dst_surface->base;
3349     orig_w = obj_surface->orig_width;
3350     orig_h = obj_surface->orig_height;
3351     w = obj_surface->width;
3352     h = obj_surface->height;
3353
3354     /* destination Y surface index 7 */
3355     i965_pp_set_surface_state(ctx, pp_context,
3356                               obj_surface->bo, 0,
3357                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3358                               7, 1);
3359
3360     /* destination UV surface index 8 */
3361     i965_pp_set_surface_state(ctx, pp_context,
3362                               obj_surface->bo, w * h,
3363                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3364                               8, 1);
3365     /* sampler dn */
3366     dri_bo_map(pp_context->sampler_state_table.bo, True);
3367     assert(pp_context->sampler_state_table.bo->virtual);
3368     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3369     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3370
3371     /* sample dndi index 1 */
3372     index = 0;
3373     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3374     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3375     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3376     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3377
3378     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3379     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3380     sampler_dndi[index].dw1.stmm_c2 = 0;
3381     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3382     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3383
3384     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3385     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3386     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3387     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3388
3389     sampler_dndi[index].dw3.maximum_stmm = 128;
3390     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3391     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3392     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3393     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3394
3395     sampler_dndi[index].dw4.sdi_delta = 8;
3396     sampler_dndi[index].dw4.sdi_threshold = 128;
3397     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3398     sampler_dndi[index].dw4.stmm_shift_up = 0;
3399     sampler_dndi[index].dw4.stmm_shift_down = 0;
3400     sampler_dndi[index].dw4.minimum_stmm = 0;
3401
3402     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3403     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3404     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3405     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3406
3407     sampler_dndi[index].dw6.dn_enable = 1;
3408     sampler_dndi[index].dw6.di_enable = 0;
3409     sampler_dndi[index].dw6.di_partial = 0;
3410     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3411     sampler_dndi[index].dw6.dndi_stream_id = 1;
3412     sampler_dndi[index].dw6.dndi_first_frame = 1;
3413     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3414     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3415     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3416     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3417
3418     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3419     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3420     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3421     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3422
3423     dri_bo_unmap(pp_context->sampler_state_table.bo);
3424
3425     /* private function & data */
3426     pp_context->pp_x_steps = pp_dn_x_steps;
3427     pp_context->pp_y_steps = pp_dn_y_steps;
3428     pp_context->private_context = &pp_context->pp_dn_context;
3429     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3430
3431     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3432     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3433     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3434     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3435
3436     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3437     pp_inline_parameter->grf5.number_blocks = w / 16;
3438     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3439     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3440
3441     pp_dn_context->dest_w = w;
3442     pp_dn_context->dest_h = h;
3443
3444     dst_surface->flags = src_surface->flags;
3445     
3446     return VA_STATUS_SUCCESS;
3447 }
3448
3449 static int
3450 gen7_pp_dndi_x_steps(void *private_context)
3451 {
3452     struct pp_dndi_context *pp_dndi_context = private_context;
3453
3454     return pp_dndi_context->dest_w / 16;
3455 }
3456
3457 static int
3458 gen7_pp_dndi_y_steps(void *private_context)
3459 {
3460     struct pp_dndi_context *pp_dndi_context = private_context;
3461
3462     return pp_dndi_context->dest_h / 4;
3463 }
3464
3465 static int
3466 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3467 {
3468     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3469
3470     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3471     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3472
3473     return 0;
3474 }
3475
3476 static VAStatus
3477 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3478                              const struct i965_surface *src_surface,
3479                              const VARectangle *src_rect,
3480                              struct i965_surface *dst_surface,
3481                              const VARectangle *dst_rect,
3482                              void *filter_param)
3483 {
3484     struct i965_driver_data *i965 = i965_driver_data(ctx);
3485     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3486     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3487     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
3488     struct gen7_sampler_dndi *sampler_dndi;
3489     int index;
3490     int w, h;
3491     int orig_w, orig_h;
3492     int dndi_top_first = 1;
3493     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3494     int is_first_frame = (pp_dndi_context->frame_order == -1);
3495
3496     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3497         dndi_top_first = 0;
3498     else
3499         dndi_top_first = 1;
3500
3501     /* surface */
3502     current_in_obj_surface = (struct object_surface *)src_surface->base;
3503
3504     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
3505         previous_in_obj_surface = current_in_obj_surface;
3506         is_first_frame = 1;
3507     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
3508         if (pp_dndi_context->frame_order == 0) {
3509             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
3510             if (!pipeline_param ||
3511                 !pipeline_param->num_forward_references ||
3512                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
3513                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
3514
3515                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3516             } else {
3517                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
3518                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
3519
3520                 is_first_frame = 0;
3521             }
3522         } else if (pp_dndi_context->frame_order == 1) {
3523             vpp_surface_convert(ctx,
3524                                 pp_dndi_context->current_out_obj_surface,
3525                                 (struct object_surface *)dst_surface->base);
3526             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3527             is_first_frame = 0;
3528
3529             return VA_STATUS_SUCCESS_1;
3530         } else {
3531             previous_in_obj_surface = current_in_obj_surface;
3532             is_first_frame = 1;
3533         }
3534     } else {
3535         return VA_STATUS_ERROR_UNIMPLEMENTED;
3536     }
3537
3538     /* source (temporal reference) YUV surface index 4 */
3539     orig_w = previous_in_obj_surface->orig_width;
3540     orig_h = previous_in_obj_surface->orig_height;
3541     w = previous_in_obj_surface->width;
3542     h = previous_in_obj_surface->height;
3543     gen7_pp_set_surface2_state(ctx, pp_context,
3544                                previous_in_obj_surface->bo, 0,
3545                                orig_w, orig_h, w,
3546                                0, h,
3547                                SURFACE_FORMAT_PLANAR_420_8, 1,
3548                                4);
3549
3550     /* source surface */
3551     orig_w = current_in_obj_surface->orig_width;
3552     orig_h = current_in_obj_surface->orig_height;
3553     w = current_in_obj_surface->width;
3554     h = current_in_obj_surface->height;
3555
3556     /* source UV surface index 1 */
3557     gen7_pp_set_surface_state(ctx, pp_context,
3558                               current_in_obj_surface->bo, w * h,
3559                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3560                               1, 0);
3561
3562     /* source YUV surface index 3 */
3563     gen7_pp_set_surface2_state(ctx, pp_context,
3564                                current_in_obj_surface->bo, 0,
3565                                orig_w, orig_h, w,
3566                                0, h,
3567                                SURFACE_FORMAT_PLANAR_420_8, 1,
3568                                3);
3569
3570     /* STMM / History Statistics input surface, index 5 */
3571     if (pp_dndi_context->stmm_bo == NULL) {
3572         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3573                                                 "STMM surface",
3574                                                 w * h,
3575                                                 4096);
3576         assert(pp_dndi_context->stmm_bo);
3577     }
3578
3579     gen7_pp_set_surface_state(ctx, pp_context,
3580                               pp_dndi_context->stmm_bo, 0,
3581                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3582                               5, 1);
3583
3584     /* destination surface */
3585     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
3586     orig_w = previous_out_obj_surface->orig_width;
3587     orig_h = previous_out_obj_surface->orig_height;
3588     w = previous_out_obj_surface->width;
3589     h = previous_out_obj_surface->height;
3590
3591     if (is_first_frame) {
3592         current_out_obj_surface = previous_out_obj_surface;
3593     } else {
3594         VAStatus va_status;
3595
3596         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
3597             unsigned int tiling = 0, swizzle = 0;
3598             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
3599
3600             va_status = i965_CreateSurfaces(ctx,
3601                                             orig_w,
3602                                             orig_h,
3603                                             VA_RT_FORMAT_YUV420,
3604                                             1,
3605                                             &pp_dndi_context->current_out_surface);
3606             assert(va_status == VA_STATUS_SUCCESS);
3607             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
3608             assert(pp_dndi_context->current_out_obj_surface);
3609             i965_check_alloc_surface_bo(ctx,
3610                                         pp_dndi_context->current_out_obj_surface,
3611                                         tiling != I915_TILING_NONE,
3612                                         VA_FOURCC_NV12,
3613                                         SUBSAMPLE_YUV420);
3614         }
3615
3616         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
3617     }
3618
3619     /* destination(Previous frame) Y surface index 27 */
3620     gen7_pp_set_surface_state(ctx, pp_context,
3621                               previous_out_obj_surface->bo, 0,
3622                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3623                               27, 1);
3624
3625     /* destination(Previous frame) UV surface index 28 */
3626     gen7_pp_set_surface_state(ctx, pp_context,
3627                               previous_out_obj_surface->bo, w * h,
3628                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3629                               28, 1);
3630
3631     /* destination(Current frame) Y surface index 30 */
3632     gen7_pp_set_surface_state(ctx, pp_context,
3633                               current_out_obj_surface->bo, 0,
3634                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3635                               30, 1);
3636
3637     /* destination(Current frame) UV surface index 31 */
3638     orig_w = current_out_obj_surface->orig_width;
3639     orig_h = current_out_obj_surface->orig_height;
3640     w = current_out_obj_surface->width;
3641     h = current_out_obj_surface->height;
3642
3643     gen7_pp_set_surface_state(ctx, pp_context,
3644                               current_out_obj_surface->bo, w * h,
3645                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3646                               31, 1);
3647
3648     /* STMM output surface, index 33 */
3649     gen7_pp_set_surface_state(ctx, pp_context,
3650                               pp_dndi_context->stmm_bo, 0,
3651                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3652                               33, 1);
3653
3654
3655     /* sampler dndi */
3656     dri_bo_map(pp_context->sampler_state_table.bo, True);
3657     assert(pp_context->sampler_state_table.bo->virtual);
3658     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3659     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3660
3661     /* sample dndi index 0 */
3662     index = 0;
3663     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3664     sampler_dndi[index].dw0.dnmh_delt = 7;
3665     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3666     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3667     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3668     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3669
3670     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3671     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3672     sampler_dndi[index].dw1.stmm_c2 = 2;
3673     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3674     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3675
3676     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3677     sampler_dndi[index].dw2.bne_edge_th = 1;
3678     sampler_dndi[index].dw2.smooth_mv_th = 0;
3679     sampler_dndi[index].dw2.sad_tight_th = 5;
3680     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3681     sampler_dndi[index].dw2.good_neighbor_th = 12;
3682
3683     sampler_dndi[index].dw3.maximum_stmm = 150;
3684     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3685     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3686     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3687     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3688
3689     sampler_dndi[index].dw4.sdi_delta = 5;
3690     sampler_dndi[index].dw4.sdi_threshold = 100;
3691     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3692     sampler_dndi[index].dw4.stmm_shift_up = 1;
3693     sampler_dndi[index].dw4.stmm_shift_down = 0;
3694     sampler_dndi[index].dw4.minimum_stmm = 118;
3695
3696     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3697     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3698     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3699     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3700     sampler_dndi[index].dw6.dn_enable = 0;
3701     sampler_dndi[index].dw6.di_enable = 1;
3702     sampler_dndi[index].dw6.di_partial = 0;
3703     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3704     sampler_dndi[index].dw6.dndi_stream_id = 1;
3705     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
3706     sampler_dndi[index].dw6.progressive_dn = 0;
3707     sampler_dndi[index].dw6.mcdi_enable = 0;
3708     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3709     sampler_dndi[index].dw6.cat_th1 = 0;
3710     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3711     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3712
3713     sampler_dndi[index].dw7.sad_tha = 5;
3714     sampler_dndi[index].dw7.sad_thb = 10;
3715     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3716     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3717     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3718     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3719     sampler_dndi[index].dw7.neighborpixel_th = 10;
3720     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3721
3722     dri_bo_unmap(pp_context->sampler_state_table.bo);
3723
3724     /* private function & data */
3725     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3726     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3727     pp_context->private_context = &pp_context->pp_dndi_context;
3728     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3729
3730     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3731     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3732     pp_static_parameter->grf1.di_top_field_first = 0;
3733     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3734
3735     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3736     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3737     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3738
3739     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3740     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3741
3742     pp_dndi_context->dest_w = w;
3743     pp_dndi_context->dest_h = h;
3744
3745     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3746
3747     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3748
3749     return VA_STATUS_SUCCESS;
3750 }
3751
3752 static int
3753 gen7_pp_dn_x_steps(void *private_context)
3754 {
3755     struct pp_dn_context *pp_dn_context = private_context;
3756
3757     return pp_dn_context->dest_w / 16;
3758 }
3759
3760 static int
3761 gen7_pp_dn_y_steps(void *private_context)
3762 {
3763     struct pp_dn_context *pp_dn_context = private_context;
3764
3765     return pp_dn_context->dest_h / 4;
3766 }
3767
3768 static int
3769 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3770 {
3771     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3772
3773     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3774     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3775
3776     return 0;
3777 }
3778
3779 static VAStatus
3780 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3781                            const struct i965_surface *src_surface,
3782                            const VARectangle *src_rect,
3783                            struct i965_surface *dst_surface,
3784                            const VARectangle *dst_rect,
3785                            void *filter_param)
3786 {
3787     struct i965_driver_data *i965 = i965_driver_data(ctx);
3788     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3789     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3790     struct object_surface *obj_surface;
3791     struct gen7_sampler_dndi *sampler_dn;
3792     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3793     int index;
3794     int w, h;
3795     int orig_w, orig_h;
3796     int dn_strength = 15;
3797     int dndi_top_first = 1;
3798     int dn_progressive = 0;
3799
3800     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3801         dndi_top_first = 1;
3802         dn_progressive = 1;
3803     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3804         dndi_top_first = 1;
3805         dn_progressive = 0;
3806     } else {
3807         dndi_top_first = 0;
3808         dn_progressive = 0;
3809     }
3810
3811     if (dn_filter_param) {
3812         float value = dn_filter_param->value;
3813         
3814         if (value > 1.0)
3815             value = 1.0;
3816         
3817         if (value < 0.0)
3818             value = 0.0;
3819
3820         dn_strength = (int)(value * 31.0F);
3821     }
3822
3823     /* surface */
3824     obj_surface = (struct object_surface *)src_surface->base;
3825     orig_w = obj_surface->orig_width;
3826     orig_h = obj_surface->orig_height;
3827     w = obj_surface->width;
3828     h = obj_surface->height;
3829
3830     if (pp_dn_context->stmm_bo == NULL) {
3831         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
3832                                              "STMM surface",
3833                                              w * h,
3834                                              4096);
3835         assert(pp_dn_context->stmm_bo);
3836     }
3837
3838     /* source UV surface index 1 */
3839     gen7_pp_set_surface_state(ctx, pp_context,
3840                               obj_surface->bo, w * h,
3841                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3842                               1, 0);
3843
3844     /* source YUV surface index 3 */
3845     gen7_pp_set_surface2_state(ctx, pp_context,
3846                                obj_surface->bo, 0,
3847                                orig_w, orig_h, w,
3848                                0, h,
3849                                SURFACE_FORMAT_PLANAR_420_8, 1,
3850                                3);
3851
3852     /* source (temporal reference) YUV surface index 4 */
3853     gen7_pp_set_surface2_state(ctx, pp_context,
3854                                obj_surface->bo, 0,
3855                                orig_w, orig_h, w,
3856                                0, h,
3857                                SURFACE_FORMAT_PLANAR_420_8, 1,
3858                                4);
3859
3860     /* STMM / History Statistics input surface, index 5 */
3861     gen7_pp_set_surface_state(ctx, pp_context,
3862                               pp_dn_context->stmm_bo, 0,
3863                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3864                               33, 1);
3865
3866     /* destination surface */
3867     obj_surface = (struct object_surface *)dst_surface->base;
3868     orig_w = obj_surface->orig_width;
3869     orig_h = obj_surface->orig_height;
3870     w = obj_surface->width;
3871     h = obj_surface->height;
3872
3873     /* destination Y surface index 24 */
3874     gen7_pp_set_surface_state(ctx, pp_context,
3875                               obj_surface->bo, 0,
3876                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3877                               24, 1);
3878
3879     /* destination UV surface index 25 */
3880     gen7_pp_set_surface_state(ctx, pp_context,
3881                               obj_surface->bo, w * h,
3882                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3883                               25, 1);
3884
3885     /* sampler dn */
3886     dri_bo_map(pp_context->sampler_state_table.bo, True);
3887     assert(pp_context->sampler_state_table.bo->virtual);
3888     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3889     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3890
3891     /* sample dn index 1 */
3892     index = 0;
3893     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3894     sampler_dn[index].dw0.dnmh_delt = 8;
3895     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3896     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3897     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3898     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3899
3900     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3901     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3902     sampler_dn[index].dw1.stmm_c2 = 0;
3903     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3904     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3905
3906     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3907     sampler_dn[index].dw2.bne_edge_th = 1;
3908     sampler_dn[index].dw2.smooth_mv_th = 0;
3909     sampler_dn[index].dw2.sad_tight_th = 5;
3910     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3911     sampler_dn[index].dw2.good_neighbor_th = 4;
3912
3913     sampler_dn[index].dw3.maximum_stmm = 128;
3914     sampler_dn[index].dw3.multipler_for_vecm = 2;
3915     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3916     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3917     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3918
3919     sampler_dn[index].dw4.sdi_delta = 8;
3920     sampler_dn[index].dw4.sdi_threshold = 128;
3921     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3922     sampler_dn[index].dw4.stmm_shift_up = 0;
3923     sampler_dn[index].dw4.stmm_shift_down = 0;
3924     sampler_dn[index].dw4.minimum_stmm = 0;
3925
3926     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3927     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3928     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3929     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3930
3931     sampler_dn[index].dw6.dn_enable = 1;
3932     sampler_dn[index].dw6.di_enable = 0;
3933     sampler_dn[index].dw6.di_partial = 0;
3934     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3935     sampler_dn[index].dw6.dndi_stream_id = 1;
3936     sampler_dn[index].dw6.dndi_first_frame = 1;
3937     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3938     sampler_dn[index].dw6.mcdi_enable = 0;
3939     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3940     sampler_dn[index].dw6.cat_th1 = 0;
3941     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3942     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3943
3944     sampler_dn[index].dw7.sad_tha = 5;
3945     sampler_dn[index].dw7.sad_thb = 10;
3946     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3947     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3948     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3949     sampler_dn[index].dw7.vdi_walker_enable = 0;
3950     sampler_dn[index].dw7.neighborpixel_th = 10;
3951     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3952
3953     dri_bo_unmap(pp_context->sampler_state_table.bo);
3954
3955     /* private function & data */
3956     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3957     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3958     pp_context->private_context = &pp_context->pp_dn_context;
3959     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3960
3961     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3962     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3963     pp_static_parameter->grf1.di_top_field_first = 0;
3964     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3965
3966     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3967     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3968     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3969
3970     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3971     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3972
3973     pp_dn_context->dest_w = w;
3974     pp_dn_context->dest_h = h;
3975
3976     dst_surface->flags = src_surface->flags;
3977
3978     return VA_STATUS_SUCCESS;
3979 }
3980
3981 static VAStatus
3982 ironlake_pp_initialize(
3983     VADriverContextP ctx,
3984     struct i965_post_processing_context *pp_context,
3985     const struct i965_surface *src_surface,
3986     const VARectangle *src_rect,
3987     struct i965_surface *dst_surface,
3988     const VARectangle *dst_rect,
3989     int pp_index,
3990     void *filter_param
3991 )
3992 {
3993     VAStatus va_status;
3994     struct i965_driver_data *i965 = i965_driver_data(ctx);
3995     struct pp_module *pp_module;
3996     dri_bo *bo;
3997     int static_param_size, inline_param_size;
3998
3999     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4000     bo = dri_bo_alloc(i965->intel.bufmgr,
4001                       "surface state & binding table",
4002                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4003                       4096);
4004     assert(bo);
4005     pp_context->surface_state_binding_table.bo = bo;
4006
4007     dri_bo_unreference(pp_context->curbe.bo);
4008     bo = dri_bo_alloc(i965->intel.bufmgr,
4009                       "constant buffer",
4010                       4096, 
4011                       4096);
4012     assert(bo);
4013     pp_context->curbe.bo = bo;
4014
4015     dri_bo_unreference(pp_context->idrt.bo);
4016     bo = dri_bo_alloc(i965->intel.bufmgr, 
4017                       "interface discriptor", 
4018                       sizeof(struct i965_interface_descriptor), 
4019                       4096);
4020     assert(bo);
4021     pp_context->idrt.bo = bo;
4022     pp_context->idrt.num_interface_descriptors = 0;
4023
4024     dri_bo_unreference(pp_context->sampler_state_table.bo);
4025     bo = dri_bo_alloc(i965->intel.bufmgr, 
4026                       "sampler state table", 
4027                       4096,
4028                       4096);
4029     assert(bo);
4030     dri_bo_map(bo, True);
4031     memset(bo->virtual, 0, bo->size);
4032     dri_bo_unmap(bo);
4033     pp_context->sampler_state_table.bo = bo;
4034
4035     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4036     bo = dri_bo_alloc(i965->intel.bufmgr, 
4037                       "sampler 8x8 state ",
4038                       4096,
4039                       4096);
4040     assert(bo);
4041     pp_context->sampler_state_table.bo_8x8 = bo;
4042
4043     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4044     bo = dri_bo_alloc(i965->intel.bufmgr, 
4045                       "sampler 8x8 state ",
4046                       4096,
4047                       4096);
4048     assert(bo);
4049     pp_context->sampler_state_table.bo_8x8_uv = bo;
4050
4051     dri_bo_unreference(pp_context->vfe_state.bo);
4052     bo = dri_bo_alloc(i965->intel.bufmgr, 
4053                       "vfe state", 
4054                       sizeof(struct i965_vfe_state), 
4055                       4096);
4056     assert(bo);
4057     pp_context->vfe_state.bo = bo;
4058
4059     static_param_size = sizeof(struct pp_static_parameter);
4060     inline_param_size = sizeof(struct pp_inline_parameter);
4061
4062     memset(pp_context->pp_static_parameter, 0, static_param_size);
4063     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4064     
4065     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4066     pp_context->current_pp = pp_index;
4067     pp_module = &pp_context->pp_modules[pp_index];
4068     
4069     if (pp_module->initialize)
4070         va_status = pp_module->initialize(ctx, pp_context,
4071                                           src_surface,
4072                                           src_rect,
4073                                           dst_surface,
4074                                           dst_rect,
4075                                           filter_param);
4076     else
4077         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4078
4079     return va_status;
4080 }
4081
4082 static VAStatus
4083 ironlake_post_processing(
4084     VADriverContextP   ctx,
4085     struct i965_post_processing_context *pp_context,
4086     const struct i965_surface *src_surface,
4087     const VARectangle *src_rect,
4088     struct i965_surface *dst_surface,
4089     const VARectangle *dst_rect,
4090     int                pp_index,
4091     void *filter_param
4092 )
4093 {
4094     VAStatus va_status;
4095
4096     va_status = ironlake_pp_initialize(ctx, pp_context,
4097                                        src_surface,
4098                                        src_rect,
4099                                        dst_surface,
4100                                        dst_rect,
4101                                        pp_index,
4102                                        filter_param);
4103
4104     if (va_status == VA_STATUS_SUCCESS) {
4105         ironlake_pp_states_setup(ctx, pp_context);
4106         ironlake_pp_pipeline_setup(ctx, pp_context);
4107     }
4108
4109     return va_status;
4110 }
4111
4112 static VAStatus
4113 gen6_pp_initialize(
4114     VADriverContextP ctx,
4115     struct i965_post_processing_context *pp_context,
4116     const struct i965_surface *src_surface,
4117     const VARectangle *src_rect,
4118     struct i965_surface *dst_surface,
4119     const VARectangle *dst_rect,
4120     int pp_index,
4121     void *filter_param
4122 )
4123 {
4124     VAStatus va_status;
4125     struct i965_driver_data *i965 = i965_driver_data(ctx);
4126     struct pp_module *pp_module;
4127     dri_bo *bo;
4128     int static_param_size, inline_param_size;
4129
4130     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4131     bo = dri_bo_alloc(i965->intel.bufmgr,
4132                       "surface state & binding table",
4133                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4134                       4096);
4135     assert(bo);
4136     pp_context->surface_state_binding_table.bo = bo;
4137
4138     dri_bo_unreference(pp_context->curbe.bo);
4139     bo = dri_bo_alloc(i965->intel.bufmgr,
4140                       "constant buffer",
4141                       4096, 
4142                       4096);
4143     assert(bo);
4144     pp_context->curbe.bo = bo;
4145
4146     dri_bo_unreference(pp_context->idrt.bo);
4147     bo = dri_bo_alloc(i965->intel.bufmgr, 
4148                       "interface discriptor", 
4149                       sizeof(struct gen6_interface_descriptor_data), 
4150                       4096);
4151     assert(bo);
4152     pp_context->idrt.bo = bo;
4153     pp_context->idrt.num_interface_descriptors = 0;
4154
4155     dri_bo_unreference(pp_context->sampler_state_table.bo);
4156     bo = dri_bo_alloc(i965->intel.bufmgr, 
4157                       "sampler state table", 
4158                       4096,
4159                       4096);
4160     assert(bo);
4161     dri_bo_map(bo, True);
4162     memset(bo->virtual, 0, bo->size);
4163     dri_bo_unmap(bo);
4164     pp_context->sampler_state_table.bo = bo;
4165
4166     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4167     bo = dri_bo_alloc(i965->intel.bufmgr, 
4168                       "sampler 8x8 state ",
4169                       4096,
4170                       4096);
4171     assert(bo);
4172     pp_context->sampler_state_table.bo_8x8 = bo;
4173
4174     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4175     bo = dri_bo_alloc(i965->intel.bufmgr, 
4176                       "sampler 8x8 state ",
4177                       4096,
4178                       4096);
4179     assert(bo);
4180     pp_context->sampler_state_table.bo_8x8_uv = bo;
4181
4182     dri_bo_unreference(pp_context->vfe_state.bo);
4183     bo = dri_bo_alloc(i965->intel.bufmgr, 
4184                       "vfe state", 
4185                       sizeof(struct i965_vfe_state), 
4186                       4096);
4187     assert(bo);
4188     pp_context->vfe_state.bo = bo;
4189     
4190     if (IS_GEN7(i965->intel.device_info)) {
4191         static_param_size = sizeof(struct gen7_pp_static_parameter);
4192         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4193     } else {
4194         static_param_size = sizeof(struct pp_static_parameter);
4195         inline_param_size = sizeof(struct pp_inline_parameter);
4196     }
4197
4198     memset(pp_context->pp_static_parameter, 0, static_param_size);
4199     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4200
4201     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4202     pp_context->current_pp = pp_index;
4203     pp_module = &pp_context->pp_modules[pp_index];
4204     
4205     if (pp_module->initialize)
4206         va_status = pp_module->initialize(ctx, pp_context,
4207                                           src_surface,
4208                                           src_rect,
4209                                           dst_surface,
4210                                           dst_rect,
4211                                           filter_param);
4212     else
4213         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4214
4215     calculate_boundary_block_mask(pp_context, dst_rect);
4216
4217     return va_status;
4218 }
4219
4220
4221 static void
4222 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4223                                    struct i965_post_processing_context *pp_context)
4224 {
4225     struct i965_driver_data *i965 = i965_driver_data(ctx);
4226     struct gen6_interface_descriptor_data *desc;
4227     dri_bo *bo;
4228     int pp_index = pp_context->current_pp;
4229
4230     bo = pp_context->idrt.bo;
4231     dri_bo_map(bo, True);
4232     assert(bo->virtual);
4233     desc = bo->virtual;
4234     memset(desc, 0, sizeof(*desc));
4235     desc->desc0.kernel_start_pointer = 
4236         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4237     desc->desc1.single_program_flow = 1;
4238     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4239     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4240     desc->desc2.sampler_state_pointer = 
4241         pp_context->sampler_state_table.bo->offset >> 5;
4242     desc->desc3.binding_table_entry_count = 0;
4243     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4244     desc->desc4.constant_urb_entry_read_offset = 0;
4245
4246     if (IS_GEN7(i965->intel.device_info))
4247         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4248     else
4249         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4250
4251     dri_bo_emit_reloc(bo,
4252                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4253                       0,
4254                       offsetof(struct gen6_interface_descriptor_data, desc0),
4255                       pp_context->pp_modules[pp_index].kernel.bo);
4256
4257     dri_bo_emit_reloc(bo,
4258                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4259                       desc->desc2.sampler_count << 2,
4260                       offsetof(struct gen6_interface_descriptor_data, desc2),
4261                       pp_context->sampler_state_table.bo);
4262
4263     dri_bo_unmap(bo);
4264     pp_context->idrt.num_interface_descriptors++;
4265 }
4266
4267 static void
4268 gen6_pp_upload_constants(VADriverContextP ctx,
4269                          struct i965_post_processing_context *pp_context)
4270 {
4271     struct i965_driver_data *i965 = i965_driver_data(ctx);
4272     unsigned char *constant_buffer;
4273     int param_size;
4274
4275     assert(sizeof(struct pp_static_parameter) == 128);
4276     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4277
4278     if (IS_GEN7(i965->intel.device_info))
4279         param_size = sizeof(struct gen7_pp_static_parameter);
4280     else
4281         param_size = sizeof(struct pp_static_parameter);
4282
4283     dri_bo_map(pp_context->curbe.bo, 1);
4284     assert(pp_context->curbe.bo->virtual);
4285     constant_buffer = pp_context->curbe.bo->virtual;
4286     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4287     dri_bo_unmap(pp_context->curbe.bo);
4288 }
4289
4290 static void
4291 gen6_pp_states_setup(VADriverContextP ctx,
4292                      struct i965_post_processing_context *pp_context)
4293 {
4294     gen6_pp_interface_descriptor_table(ctx, pp_context);
4295     gen6_pp_upload_constants(ctx, pp_context);
4296 }
4297
4298 static void
4299 gen6_pp_pipeline_select(VADriverContextP ctx,
4300                         struct i965_post_processing_context *pp_context)
4301 {
4302     struct intel_batchbuffer *batch = pp_context->batch;
4303
4304     BEGIN_BATCH(batch, 1);
4305     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4306     ADVANCE_BATCH(batch);
4307 }
4308
4309 static void
4310 gen6_pp_state_base_address(VADriverContextP ctx,
4311                            struct i965_post_processing_context *pp_context)
4312 {
4313     struct intel_batchbuffer *batch = pp_context->batch;
4314
4315     BEGIN_BATCH(batch, 10);
4316     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4317     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4318     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4319     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4320     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4321     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4322     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4323     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4324     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4325     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4326     ADVANCE_BATCH(batch);
4327 }
4328
4329 static void
4330 gen6_pp_vfe_state(VADriverContextP ctx,
4331                   struct i965_post_processing_context *pp_context)
4332 {
4333     struct intel_batchbuffer *batch = pp_context->batch;
4334
4335     BEGIN_BATCH(batch, 8);
4336     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4337     OUT_BATCH(batch, 0);
4338     OUT_BATCH(batch,
4339               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4340               pp_context->vfe_gpu_state.num_urb_entries << 8);
4341     OUT_BATCH(batch, 0);
4342     OUT_BATCH(batch,
4343               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
4344                 /* URB Entry Allocation Size, in 256 bits unit */
4345               (pp_context->vfe_gpu_state.curbe_allocation_size));
4346                 /* CURBE Allocation Size, in 256 bits unit */
4347     OUT_BATCH(batch, 0);
4348     OUT_BATCH(batch, 0);
4349     OUT_BATCH(batch, 0);
4350     ADVANCE_BATCH(batch);
4351 }
4352
4353 static void
4354 gen6_pp_curbe_load(VADriverContextP ctx,
4355                    struct i965_post_processing_context *pp_context)
4356 {
4357     struct intel_batchbuffer *batch = pp_context->batch;
4358     struct i965_driver_data *i965 = i965_driver_data(ctx);
4359     int param_size;
4360
4361     if (IS_GEN7(i965->intel.device_info))
4362         param_size = sizeof(struct gen7_pp_static_parameter);
4363     else
4364         param_size = sizeof(struct pp_static_parameter);
4365
4366     BEGIN_BATCH(batch, 4);
4367     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4368     OUT_BATCH(batch, 0);
4369     OUT_BATCH(batch,
4370               param_size);
4371     OUT_RELOC(batch, 
4372               pp_context->curbe.bo,
4373               I915_GEM_DOMAIN_INSTRUCTION, 0,
4374               0);
4375     ADVANCE_BATCH(batch);
4376 }
4377
4378 static void
4379 gen6_interface_descriptor_load(VADriverContextP ctx,
4380                                struct i965_post_processing_context *pp_context)
4381 {
4382     struct intel_batchbuffer *batch = pp_context->batch;
4383
4384     BEGIN_BATCH(batch, 4);
4385     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4386     OUT_BATCH(batch, 0);
4387     OUT_BATCH(batch,
4388               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4389     OUT_RELOC(batch, 
4390               pp_context->idrt.bo,
4391               I915_GEM_DOMAIN_INSTRUCTION, 0,
4392               0);
4393     ADVANCE_BATCH(batch);
4394 }
4395
4396 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4397 {
4398     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4399
4400     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4401     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4402     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4403     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4404     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4405     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4406
4407     /* 1 x N */
4408     if (x_steps == 1) {
4409         if (y == y_steps-1) {
4410             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4411         }
4412         else {
4413             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4414         }
4415     }
4416
4417     /* M x 1 */
4418     if (y_steps == 1) {
4419         if (x == 0) { // all blocks in this group are on the left edge
4420             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4421             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4422         }
4423         else if (x == x_steps-1) {
4424             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4425             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4426         }
4427         else {
4428             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4429             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4430             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4431         }
4432     }
4433
4434 }
4435
4436 static void
4437 gen6_pp_object_walker(VADriverContextP ctx,
4438                       struct i965_post_processing_context *pp_context)
4439 {
4440     struct i965_driver_data *i965 = i965_driver_data(ctx);
4441     struct intel_batchbuffer *batch = pp_context->batch;
4442     int x, x_steps, y, y_steps;
4443     int param_size, command_length_in_dws;
4444     dri_bo *command_buffer;
4445     unsigned int *command_ptr;
4446
4447     if (IS_GEN7(i965->intel.device_info))
4448         param_size = sizeof(struct gen7_pp_inline_parameter);
4449     else
4450         param_size = sizeof(struct pp_inline_parameter);
4451
4452     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4453     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4454     command_length_in_dws = 6 + (param_size >> 2);
4455     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4456                                   "command objects buffer",
4457                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4458                                   4096);
4459
4460     dri_bo_map(command_buffer, 1);
4461     command_ptr = command_buffer->virtual;
4462
4463     for (y = 0; y < y_steps; y++) {
4464         for (x = 0; x < x_steps; x++) {
4465             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4466                 // some common block parameter update goes here, apply to all pp functions
4467                 if (IS_GEN6(i965->intel.device_info))
4468                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4469                 
4470                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4471                 *command_ptr++ = 0;
4472                 *command_ptr++ = 0;
4473                 *command_ptr++ = 0;
4474                 *command_ptr++ = 0;
4475                 *command_ptr++ = 0;
4476                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4477                 command_ptr += (param_size >> 2);
4478             }
4479         }
4480     }
4481
4482     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4483         *command_ptr++ = 0;
4484
4485     *command_ptr = MI_BATCH_BUFFER_END;
4486
4487     dri_bo_unmap(command_buffer);
4488
4489     BEGIN_BATCH(batch, 2);
4490     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4491     OUT_RELOC(batch, command_buffer,
4492               I915_GEM_DOMAIN_COMMAND, 0,
4493               0);
4494     ADVANCE_BATCH(batch);
4495
4496     dri_bo_unreference(command_buffer);
4497
4498     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4499      * will cause control to pass back to ring buffer 
4500      */
4501     intel_batchbuffer_end_atomic(batch);
4502     intel_batchbuffer_flush(batch);
4503     intel_batchbuffer_start_atomic(batch, 0x1000);
4504 }
4505
4506 static void
4507 gen6_pp_pipeline_setup(VADriverContextP ctx,
4508                        struct i965_post_processing_context *pp_context)
4509 {
4510     struct intel_batchbuffer *batch = pp_context->batch;
4511
4512     intel_batchbuffer_start_atomic(batch, 0x1000);
4513     intel_batchbuffer_emit_mi_flush(batch);
4514     gen6_pp_pipeline_select(ctx, pp_context);
4515     gen6_pp_state_base_address(ctx, pp_context);
4516     gen6_pp_vfe_state(ctx, pp_context);
4517     gen6_pp_curbe_load(ctx, pp_context);
4518     gen6_interface_descriptor_load(ctx, pp_context);
4519     gen6_pp_object_walker(ctx, pp_context);
4520     intel_batchbuffer_end_atomic(batch);
4521 }
4522
4523 static VAStatus
4524 gen6_post_processing(
4525     VADriverContextP ctx,
4526     struct i965_post_processing_context *pp_context,
4527     const struct i965_surface *src_surface,
4528     const VARectangle *src_rect,
4529     struct i965_surface *dst_surface,
4530     const VARectangle *dst_rect,
4531     int pp_index,
4532     void *filter_param
4533 )
4534 {
4535     VAStatus va_status;
4536     
4537     va_status = gen6_pp_initialize(ctx, pp_context,
4538                                    src_surface,
4539                                    src_rect,
4540                                    dst_surface,
4541                                    dst_rect,
4542                                    pp_index,
4543                                    filter_param);
4544
4545     if (va_status == VA_STATUS_SUCCESS) {
4546         gen6_pp_states_setup(ctx, pp_context);
4547         gen6_pp_pipeline_setup(ctx, pp_context);
4548     }
4549
4550     if (va_status == VA_STATUS_SUCCESS_1)
4551         va_status = VA_STATUS_SUCCESS;
4552
4553     return va_status;
4554 }
4555
4556 static VAStatus
4557 i965_post_processing_internal(
4558     VADriverContextP   ctx,
4559     struct i965_post_processing_context *pp_context,
4560     const struct i965_surface *src_surface,
4561     const VARectangle *src_rect,
4562     struct i965_surface *dst_surface,
4563     const VARectangle *dst_rect,
4564     int                pp_index,
4565     void *filter_param
4566 )
4567 {
4568     VAStatus va_status;
4569     struct i965_driver_data *i965 = i965_driver_data(ctx);
4570
4571     if (pp_context && pp_context->intel_post_processing) {
4572         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4573                           src_surface, src_rect,
4574                           dst_surface, dst_rect,
4575                           pp_index, filter_param);
4576     } else {
4577         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4578     }
4579
4580     return va_status;
4581 }
4582
4583 static void
4584 rgb_to_yuv(unsigned int argb,
4585            unsigned char *y,
4586            unsigned char *u,
4587            unsigned char *v,
4588            unsigned char *a)
4589 {
4590     int r = ((argb >> 16) & 0xff);
4591     int g = ((argb >> 8) & 0xff);
4592     int b = ((argb >> 0) & 0xff);
4593     
4594     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4595     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4596     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4597     *a = ((argb >> 24) & 0xff);
4598 }
4599
4600 static void 
4601 i965_vpp_clear_surface(VADriverContextP ctx,
4602                        struct i965_post_processing_context *pp_context,
4603                        struct object_surface *obj_surface,
4604                        unsigned int color)
4605 {
4606     struct i965_driver_data *i965 = i965_driver_data(ctx);
4607     struct intel_batchbuffer *batch = pp_context->batch;
4608     unsigned int blt_cmd, br13;
4609     unsigned int tiling = 0, swizzle = 0;
4610     int pitch;
4611     unsigned char y, u, v, a = 0;
4612     int region_width, region_height;
4613
4614     /* Currently only support NV12 surface */
4615     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4616         return;
4617
4618     rgb_to_yuv(color, &y, &u, &v, &a);
4619
4620     if (a == 0)
4621         return;
4622
4623     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4624     blt_cmd = XY_COLOR_BLT_CMD;
4625     pitch = obj_surface->width;
4626
4627     if (tiling != I915_TILING_NONE) {
4628         assert(tiling == I915_TILING_Y);
4629         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4630         // pitch >>= 2;
4631     }
4632
4633     br13 = 0xf0 << 16;
4634     br13 |= BR13_8;
4635     br13 |= pitch;
4636
4637     if (IS_IRONLAKE(i965->intel.device_info)) {
4638         intel_batchbuffer_start_atomic(batch, 48);
4639         BEGIN_BATCH(batch, 12);
4640     } else {
4641         /* Will double-check the command if the new chipset is added */
4642         intel_batchbuffer_start_atomic_blt(batch, 48);
4643         BEGIN_BLT_BATCH(batch, 12);
4644     }
4645
4646     region_width = obj_surface->width;
4647     region_height = obj_surface->height;
4648
4649     OUT_BATCH(batch, blt_cmd);
4650     OUT_BATCH(batch, br13);
4651     OUT_BATCH(batch,
4652               0 << 16 |
4653               0);
4654     OUT_BATCH(batch,
4655               region_height << 16 |
4656               region_width);
4657     OUT_RELOC(batch, obj_surface->bo, 
4658               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4659               0);
4660     OUT_BATCH(batch, y);
4661
4662     br13 = 0xf0 << 16;
4663     br13 |= BR13_565;
4664     br13 |= pitch;
4665
4666     region_width = obj_surface->width / 2;
4667     region_height = obj_surface->height / 2;
4668
4669     if (tiling == I915_TILING_Y) {
4670         region_height = ALIGN(obj_surface->height / 2, 32);
4671     }
4672
4673     OUT_BATCH(batch, blt_cmd);
4674     OUT_BATCH(batch, br13);
4675     OUT_BATCH(batch,
4676               0 << 16 |
4677               0);
4678     OUT_BATCH(batch,
4679               region_height << 16 |
4680               region_width);
4681     OUT_RELOC(batch, obj_surface->bo, 
4682               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4683               obj_surface->width * obj_surface->y_cb_offset);
4684     OUT_BATCH(batch, v << 8 | u);
4685
4686     ADVANCE_BATCH(batch);
4687     intel_batchbuffer_end_atomic(batch);
4688 }
4689
4690 VAStatus
4691 i965_scaling_processing(
4692     VADriverContextP   ctx,
4693     struct object_surface *src_surface_obj,
4694     const VARectangle *src_rect,
4695     struct object_surface *dst_surface_obj,
4696     const VARectangle *dst_rect,
4697     unsigned int       flags)
4698 {
4699     VAStatus va_status = VA_STATUS_SUCCESS;
4700     struct i965_driver_data *i965 = i965_driver_data(ctx);
4701  
4702     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4703     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4704
4705     if (HAS_VPP(i965) && (flags & I965_PP_FLAG_AVS)) {
4706         struct i965_surface src_surface;
4707         struct i965_surface dst_surface;
4708
4709          _i965LockMutex(&i965->pp_mutex);
4710
4711          src_surface.base = (struct object_base *)src_surface_obj;
4712          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4713          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4714          dst_surface.base = (struct object_base *)dst_surface_obj;
4715          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4716          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4717
4718          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4719                                                    &src_surface,
4720                                                    src_rect,
4721                                                    &dst_surface,
4722                                                    dst_rect,
4723                                                    PP_NV12_AVS,
4724                                                    NULL);
4725
4726          _i965UnlockMutex(&i965->pp_mutex);
4727     }
4728
4729     return va_status;
4730 }
4731
4732 VASurfaceID
4733 i965_post_processing(
4734     VADriverContextP   ctx,
4735     struct object_surface *obj_surface,
4736     const VARectangle *src_rect,
4737     const VARectangle *dst_rect,
4738     unsigned int       flags,
4739     int               *has_done_scaling  
4740 )
4741 {
4742     struct i965_driver_data *i965 = i965_driver_data(ctx);
4743     VASurfaceID out_surface_id = VA_INVALID_ID;
4744     VASurfaceID tmp_id = VA_INVALID_ID;
4745     
4746     *has_done_scaling = 0;
4747
4748     if (HAS_VPP(i965)) {
4749         VAStatus status;
4750         struct i965_surface src_surface;
4751         struct i965_surface dst_surface;
4752
4753         /* Currently only support post processing for NV12 surface */
4754         if (obj_surface->fourcc != VA_FOURCC_NV12)
4755             return out_surface_id;
4756
4757         _i965LockMutex(&i965->pp_mutex);
4758
4759         if (flags & I965_PP_FLAG_MCDI) {
4760             src_surface.base = (struct object_base *)obj_surface;
4761             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4762             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4763                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4764
4765             status = i965_CreateSurfaces(ctx,
4766                                          obj_surface->orig_width,
4767                                          obj_surface->orig_height,
4768                                          VA_RT_FORMAT_YUV420,
4769                                          1,
4770                                          &out_surface_id);
4771             assert(status == VA_STATUS_SUCCESS);
4772             obj_surface = SURFACE(out_surface_id);
4773             assert(obj_surface);
4774             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4775             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4776
4777             dst_surface.base = (struct object_base *)obj_surface;
4778             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4779             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4780
4781             i965_post_processing_internal(ctx, i965->pp_context,
4782                                           &src_surface,
4783                                           src_rect,
4784                                           &dst_surface,
4785                                           dst_rect,
4786                                           PP_NV12_DNDI,
4787                                           NULL);
4788         }
4789
4790         if (flags & I965_PP_FLAG_AVS) {
4791             struct i965_render_state *render_state = &i965->render_state;
4792             struct intel_region *dest_region = render_state->draw_region;
4793
4794             if (out_surface_id != VA_INVALID_ID)
4795                 tmp_id = out_surface_id;
4796
4797             src_surface.base = (struct object_base *)obj_surface;
4798             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4799             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4800
4801             status = i965_CreateSurfaces(ctx,
4802                                          dest_region->width,
4803                                          dest_region->height,
4804                                          VA_RT_FORMAT_YUV420,
4805                                          1,
4806                                          &out_surface_id);
4807             assert(status == VA_STATUS_SUCCESS);
4808             obj_surface = SURFACE(out_surface_id);
4809             assert(obj_surface);
4810             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4811             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4812
4813             dst_surface.base = (struct object_base *)obj_surface;
4814             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4815             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4816
4817             i965_post_processing_internal(ctx, i965->pp_context,
4818                                           &src_surface,
4819                                           src_rect,
4820                                           &dst_surface,
4821                                           dst_rect,
4822                                           PP_NV12_AVS,
4823                                           NULL);
4824
4825             if (tmp_id != VA_INVALID_ID)
4826                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4827                 
4828             *has_done_scaling = 1;
4829         }
4830
4831         _i965UnlockMutex(&i965->pp_mutex);
4832     }
4833
4834     return out_surface_id;
4835 }       
4836
4837 static VAStatus
4838 i965_image_pl2_processing(VADriverContextP ctx,
4839                           const struct i965_surface *src_surface,
4840                           const VARectangle *src_rect,
4841                           struct i965_surface *dst_surface,
4842                           const VARectangle *dst_rect);
4843
4844 static VAStatus
4845 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4846                                    VAStatus (*i965_image_plx_nv12_processing)(
4847                                        VADriverContextP,
4848                                        const struct i965_surface *,
4849                                        const VARectangle *,
4850                                        struct i965_surface *,
4851                                        const VARectangle *),
4852                                    const struct i965_surface *src_surface,
4853                                    const VARectangle *src_rect,
4854                                    struct i965_surface *dst_surface,
4855                                    const VARectangle *dst_rect)
4856 {
4857     struct i965_driver_data *i965 = i965_driver_data(ctx);
4858     VAStatus status;
4859     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
4860     struct object_surface *obj_surface = NULL;
4861     struct i965_surface tmp_surface;
4862     int width, height;
4863
4864     pp_get_surface_size(ctx, dst_surface, &width, &height);
4865     status = i965_CreateSurfaces(ctx,
4866                                  width,
4867                                  height,
4868                                  VA_RT_FORMAT_YUV420,
4869                                  1,
4870                                  &tmp_surface_id);
4871     assert(status == VA_STATUS_SUCCESS);
4872     obj_surface = SURFACE(tmp_surface_id);
4873     assert(obj_surface);
4874     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4875
4876     tmp_surface.base = (struct object_base *)obj_surface;
4877     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
4878     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
4879
4880     status = i965_image_plx_nv12_processing(ctx,
4881                                             src_surface,
4882                                             src_rect,
4883                                             &tmp_surface,
4884                                             dst_rect);
4885
4886     if (status == VA_STATUS_SUCCESS)
4887         status = i965_image_pl2_processing(ctx,
4888                                            &tmp_surface,
4889                                            dst_rect,
4890                                            dst_surface,
4891                                            dst_rect);
4892
4893     i965_DestroySurfaces(ctx,
4894                          &tmp_surface_id,
4895                          1);
4896
4897     return status;
4898 }
4899
4900
4901 static VAStatus
4902 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4903                                const struct i965_surface *src_surface,
4904                                const VARectangle *src_rect,
4905                                struct i965_surface *dst_surface,
4906                                const VARectangle *dst_rect)
4907 {
4908     struct i965_driver_data *i965 = i965_driver_data(ctx);
4909     struct i965_post_processing_context *pp_context = i965->pp_context;
4910     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4911     VAStatus vaStatus;
4912
4913     switch (fourcc) {
4914     case VA_FOURCC_NV12:
4915         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4916                                                  src_surface,
4917                                                  src_rect,
4918                                                  dst_surface,
4919                                                  dst_rect,
4920                                                  PP_RGBX_LOAD_SAVE_NV12,
4921                                                  NULL);
4922         intel_batchbuffer_flush(pp_context->batch);
4923         break;
4924
4925     default:
4926         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4927                                                       i965_image_pl1_rgbx_processing,
4928                                                       src_surface,
4929                                                       src_rect,
4930                                                       dst_surface,
4931                                                       dst_rect);
4932         break;
4933     }
4934
4935     return vaStatus;
4936 }
4937
4938 static VAStatus
4939 i965_image_pl3_processing(VADriverContextP ctx,
4940                           const struct i965_surface *src_surface,
4941                           const VARectangle *src_rect,
4942                           struct i965_surface *dst_surface,
4943                           const VARectangle *dst_rect)
4944 {
4945     struct i965_driver_data *i965 = i965_driver_data(ctx);
4946     struct i965_post_processing_context *pp_context = i965->pp_context;
4947     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4948     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4949
4950     switch (fourcc) {
4951     case VA_FOURCC_NV12:
4952         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4953                                                  src_surface,
4954                                                  src_rect,
4955                                                  dst_surface,
4956                                                  dst_rect,
4957                                                  PP_PL3_LOAD_SAVE_N12,
4958                                                  NULL);
4959         intel_batchbuffer_flush(pp_context->batch);
4960         break;
4961
4962     case VA_FOURCC_IMC1:
4963     case VA_FOURCC_IMC3:
4964     case VA_FOURCC_YV12:
4965     case VA_FOURCC_I420:
4966         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4967                                                  src_surface,
4968                                                  src_rect,
4969                                                  dst_surface,
4970                                                  dst_rect,
4971                                                  PP_PL3_LOAD_SAVE_PL3,
4972                                                  NULL);
4973         intel_batchbuffer_flush(pp_context->batch);
4974         break;
4975
4976     case VA_FOURCC_YUY2:
4977     case VA_FOURCC_UYVY:
4978         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4979                                                  src_surface,
4980                                                  src_rect,
4981                                                  dst_surface,
4982                                                  dst_rect,
4983                                                  PP_PL3_LOAD_SAVE_PA,
4984                                                  NULL);
4985         intel_batchbuffer_flush(pp_context->batch);
4986         break;
4987
4988     default:
4989         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4990                                                       i965_image_pl3_processing,
4991                                                       src_surface,
4992                                                       src_rect,
4993                                                       dst_surface,
4994                                                       dst_rect);
4995         break;
4996     }
4997
4998     return vaStatus;
4999 }
5000
5001 static VAStatus
5002 i965_image_pl2_processing(VADriverContextP ctx,
5003                           const struct i965_surface *src_surface,
5004                           const VARectangle *src_rect,
5005                           struct i965_surface *dst_surface,
5006                           const VARectangle *dst_rect)
5007 {
5008     struct i965_driver_data *i965 = i965_driver_data(ctx);
5009     struct i965_post_processing_context *pp_context = i965->pp_context;
5010     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5011     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
5012
5013     switch (fourcc) {
5014     case VA_FOURCC_NV12:
5015         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5016                                                  src_surface,
5017                                                  src_rect,
5018                                                  dst_surface,
5019                                                  dst_rect,
5020                                                  PP_NV12_LOAD_SAVE_N12,
5021                                                  NULL);
5022         break;
5023
5024     case VA_FOURCC_IMC1:
5025     case VA_FOURCC_IMC3:
5026     case VA_FOURCC_YV12:
5027     case VA_FOURCC_I420:
5028         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5029                                                  src_surface,
5030                                                  src_rect,
5031                                                  dst_surface,
5032                                                  dst_rect,
5033                                                  PP_NV12_LOAD_SAVE_PL3,
5034                                                  NULL);
5035         break;
5036
5037     case VA_FOURCC_YUY2:
5038     case VA_FOURCC_UYVY:
5039         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5040                                                  src_surface,
5041                                                  src_rect,
5042                                                  dst_surface,
5043                                                  dst_rect,
5044                                                  PP_NV12_LOAD_SAVE_PA,
5045                                                  NULL);
5046         break;
5047
5048     case VA_FOURCC_BGRX:
5049     case VA_FOURCC_BGRA:
5050     case VA_FOURCC_RGBX:
5051     case VA_FOURCC_RGBA:
5052         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5053                                                  src_surface,
5054                                                  src_rect,
5055                                                  dst_surface,
5056                                                  dst_rect,
5057                                                  PP_NV12_LOAD_SAVE_RGBX,
5058                                                  NULL);
5059         break;
5060
5061     default:
5062         return VA_STATUS_ERROR_UNIMPLEMENTED;
5063     }
5064
5065     intel_batchbuffer_flush(pp_context->batch);
5066
5067     return vaStatus;
5068 }
5069
5070 static VAStatus
5071 i965_image_pl1_processing(VADriverContextP ctx,
5072                           const struct i965_surface *src_surface,
5073                           const VARectangle *src_rect,
5074                           struct i965_surface *dst_surface,
5075                           const VARectangle *dst_rect)
5076 {
5077     struct i965_driver_data *i965 = i965_driver_data(ctx);
5078     struct i965_post_processing_context *pp_context = i965->pp_context;
5079     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5080     VAStatus vaStatus;
5081
5082     switch (fourcc) {
5083     case VA_FOURCC_NV12:
5084         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5085                                                  src_surface,
5086                                                  src_rect,
5087                                                  dst_surface,
5088                                                  dst_rect,
5089                                                  PP_PA_LOAD_SAVE_NV12,
5090                                                  NULL);
5091         intel_batchbuffer_flush(pp_context->batch);
5092         break;
5093
5094     case VA_FOURCC_YV12:
5095         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5096                                                  src_surface,
5097                                                  src_rect,
5098                                                  dst_surface,
5099                                                  dst_rect,
5100                                                  PP_PA_LOAD_SAVE_PL3,
5101                                                  NULL);
5102         intel_batchbuffer_flush(pp_context->batch);
5103         break;
5104
5105     case VA_FOURCC_YUY2:
5106     case VA_FOURCC_UYVY:
5107         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5108                                                  src_surface,
5109                                                  src_rect,
5110                                                  dst_surface,
5111                                                  dst_rect,
5112                                                  PP_PA_LOAD_SAVE_PA,
5113                                                  NULL);
5114         intel_batchbuffer_flush(pp_context->batch);
5115         break;
5116
5117     default:
5118         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5119                                                       i965_image_pl1_processing,
5120                                                       src_surface,
5121                                                       src_rect,
5122                                                       dst_surface,
5123                                                       dst_rect);
5124         break;
5125     }
5126
5127     return vaStatus;
5128 }
5129
5130 VAStatus
5131 i965_image_processing(VADriverContextP ctx,
5132                       const struct i965_surface *src_surface,
5133                       const VARectangle *src_rect,
5134                       struct i965_surface *dst_surface,
5135                       const VARectangle *dst_rect)
5136 {
5137     struct i965_driver_data *i965 = i965_driver_data(ctx);
5138     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5139
5140     if (HAS_VPP(i965)) {
5141         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5142
5143         _i965LockMutex(&i965->pp_mutex);
5144
5145         switch (fourcc) {
5146         case VA_FOURCC_YV12:
5147         case VA_FOURCC_I420:
5148         case VA_FOURCC_IMC1:
5149         case VA_FOURCC_IMC3:
5150         case VA_FOURCC_422H:
5151         case VA_FOURCC_422V:
5152         case VA_FOURCC_411P:
5153         case VA_FOURCC_444P:
5154         case VA_FOURCC_YV16:
5155             status = i965_image_pl3_processing(ctx,
5156                                                src_surface,
5157                                                src_rect,
5158                                                dst_surface,
5159                                                dst_rect);
5160             break;
5161
5162         case  VA_FOURCC_NV12:
5163             status = i965_image_pl2_processing(ctx,
5164                                                src_surface,
5165                                                src_rect,
5166                                                dst_surface,
5167                                                dst_rect);
5168             break;
5169         case VA_FOURCC_YUY2:
5170         case VA_FOURCC_UYVY:
5171             status = i965_image_pl1_processing(ctx,
5172                                                src_surface,
5173                                                src_rect,
5174                                                dst_surface,
5175                                                dst_rect);
5176             break;
5177         case VA_FOURCC_BGRA:
5178         case VA_FOURCC_BGRX:
5179         case VA_FOURCC_RGBA:
5180         case VA_FOURCC_RGBX:
5181             status = i965_image_pl1_rgbx_processing(ctx,
5182                                                src_surface,
5183                                                src_rect,
5184                                                dst_surface,
5185                                                dst_rect);
5186             break;
5187         default:
5188             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5189             break;
5190         }
5191         
5192         _i965UnlockMutex(&i965->pp_mutex);
5193     }
5194
5195     return status;
5196 }       
5197
5198 static void
5199 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
5200 {
5201     int i;
5202
5203     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5204     pp_context->surface_state_binding_table.bo = NULL;
5205
5206     dri_bo_unreference(pp_context->curbe.bo);
5207     pp_context->curbe.bo = NULL;
5208
5209     dri_bo_unreference(pp_context->sampler_state_table.bo);
5210     pp_context->sampler_state_table.bo = NULL;
5211
5212     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5213     pp_context->sampler_state_table.bo_8x8 = NULL;
5214
5215     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5216     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5217
5218     dri_bo_unreference(pp_context->idrt.bo);
5219     pp_context->idrt.bo = NULL;
5220     pp_context->idrt.num_interface_descriptors = 0;
5221
5222     dri_bo_unreference(pp_context->vfe_state.bo);
5223     pp_context->vfe_state.bo = NULL;
5224
5225     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
5226     pp_context->pp_dndi_context.stmm_bo = NULL;
5227
5228     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5229     pp_context->pp_dn_context.stmm_bo = NULL;
5230
5231     for (i = 0; i < NUM_PP_MODULES; i++) {
5232         struct pp_module *pp_module = &pp_context->pp_modules[i];
5233
5234         dri_bo_unreference(pp_module->kernel.bo);
5235         pp_module->kernel.bo = NULL;
5236     }
5237
5238     free(pp_context->pp_static_parameter);
5239     free(pp_context->pp_inline_parameter);
5240     pp_context->pp_static_parameter = NULL;
5241     pp_context->pp_inline_parameter = NULL;
5242 }
5243
5244 void
5245 i965_post_processing_terminate(VADriverContextP ctx)
5246 {
5247     struct i965_driver_data *i965 = i965_driver_data(ctx);
5248     struct i965_post_processing_context *pp_context = i965->pp_context;
5249
5250     if (pp_context) {
5251         pp_context->finalize(pp_context);
5252         free(pp_context);
5253     }
5254
5255     i965->pp_context = NULL;
5256 }
5257
5258 #define VPP_CURBE_ALLOCATION_SIZE       32
5259
5260 void
5261 i965_post_processing_context_init(VADriverContextP ctx,
5262                                   void *data,
5263                                   struct intel_batchbuffer *batch)
5264 {
5265     struct i965_driver_data *i965 = i965_driver_data(ctx);
5266     int i;
5267     struct i965_post_processing_context *pp_context = data;
5268
5269     if (IS_IRONLAKE(i965->intel.device_info)) {
5270         pp_context->urb.size = i965->intel.device_info->urb_size;
5271         pp_context->urb.num_vfe_entries = 32;
5272         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5273         pp_context->urb.num_cs_entries = 1;
5274         pp_context->urb.size_cs_entry = 2;
5275         pp_context->urb.vfe_start = 0;
5276         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5277             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5278         assert(pp_context->urb.cs_start +
5279            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= i965->intel.device_info->urb_size);
5280         pp_context->intel_post_processing = ironlake_post_processing;
5281     } else {
5282         pp_context->vfe_gpu_state.max_num_threads = 60;
5283         pp_context->vfe_gpu_state.num_urb_entries = 59;
5284         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5285         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5286         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5287         pp_context->intel_post_processing = gen6_post_processing;
5288     }
5289
5290     pp_context->finalize = i965_post_processing_context_finalize;
5291
5292     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5293     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5294     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5295     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5296
5297     if (IS_HASWELL(i965->intel.device_info))
5298         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5299     else if (IS_GEN7(i965->intel.device_info))
5300         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5301     else if (IS_GEN6(i965->intel.device_info))
5302         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5303     else if (IS_IRONLAKE(i965->intel.device_info))
5304         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5305
5306     for (i = 0; i < NUM_PP_MODULES; i++) {
5307         struct pp_module *pp_module = &pp_context->pp_modules[i];
5308         dri_bo_unreference(pp_module->kernel.bo);
5309         if (pp_module->kernel.bin && pp_module->kernel.size) {
5310             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5311                                                 pp_module->kernel.name,
5312                                                 pp_module->kernel.size,
5313                                                 4096);
5314             assert(pp_module->kernel.bo);
5315             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5316         } else {
5317             pp_module->kernel.bo = NULL;
5318         }
5319     }
5320
5321     /* static & inline parameters */
5322     if (IS_GEN7(i965->intel.device_info)) {
5323         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5324         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5325     } else {
5326         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5327         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5328     }
5329
5330     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
5331     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
5332     pp_context->pp_dndi_context.frame_order = -1;
5333     pp_context->batch = batch;
5334 }
5335
5336 bool
5337 i965_post_processing_init(VADriverContextP ctx)
5338 {
5339     struct i965_driver_data *i965 = i965_driver_data(ctx);
5340     struct i965_post_processing_context *pp_context = i965->pp_context;
5341
5342     if (HAS_VPP(i965)) {
5343         if (pp_context == NULL) {
5344             pp_context = calloc(1, sizeof(*pp_context));
5345             i965->codec_info->post_processing_context_init(ctx, pp_context, i965->pp_batch);
5346             i965->pp_context = pp_context;
5347         }
5348     }
5349
5350     return true;
5351 }
5352
5353 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5354     PP_NULL,    /* VAProcFilterNone */
5355     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5356     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5357     PP_NULL,    /* VAProcFilterSharpening */
5358     PP_NULL,    /* VAProcFilterColorBalance */
5359 };
5360
5361 static const int proc_frame_to_pp_frame[3] = {
5362     I965_SURFACE_FLAG_FRAME,
5363     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5364     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5365 };
5366
5367 VAStatus 
5368 i965_proc_picture(VADriverContextP ctx, 
5369                   VAProfile profile, 
5370                   union codec_state *codec_state,
5371                   struct hw_context *hw_context)
5372 {
5373     struct i965_driver_data *i965 = i965_driver_data(ctx);
5374     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5375     struct proc_state *proc_state = &codec_state->proc;
5376     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5377     struct object_surface *obj_surface;
5378     struct i965_surface src_surface, dst_surface;
5379     VARectangle src_rect, dst_rect;
5380     VAStatus status;
5381     int i;
5382     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5383     int num_tmp_surfaces = 0;
5384     unsigned int tiling = 0, swizzle = 0;
5385     int in_width, in_height;
5386
5387     if (pipeline_param->surface == VA_INVALID_ID ||
5388         proc_state->current_render_target == VA_INVALID_ID) {
5389         status = VA_STATUS_ERROR_INVALID_SURFACE;
5390         goto error;
5391     }
5392
5393     obj_surface = SURFACE(pipeline_param->surface);
5394
5395     if (!obj_surface) {
5396         status = VA_STATUS_ERROR_INVALID_SURFACE;
5397         goto error;
5398     }
5399
5400     if (!obj_surface->bo) {
5401         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
5402         goto error;
5403     }
5404
5405     if (pipeline_param->num_filters && !pipeline_param->filters) {
5406         status = VA_STATUS_ERROR_INVALID_PARAMETER;
5407         goto error;
5408     }
5409
5410     in_width = obj_surface->orig_width;
5411     in_height = obj_surface->orig_height;
5412     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5413
5414     src_surface.base = (struct object_base *)obj_surface;
5415     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5416     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5417
5418     VASurfaceID out_surface_id = VA_INVALID_ID;
5419     if (obj_surface->fourcc != VA_FOURCC_NV12) {
5420         src_surface.base = (struct object_base *)obj_surface;
5421         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5422         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5423         src_rect.x = 0;
5424         src_rect.y = 0;
5425         src_rect.width = in_width;
5426         src_rect.height = in_height;
5427
5428         status = i965_CreateSurfaces(ctx,
5429                                      in_width,
5430                                      in_height,
5431                                      VA_RT_FORMAT_YUV420,
5432                                      1,
5433                                      &out_surface_id);
5434         assert(status == VA_STATUS_SUCCESS);
5435         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5436         obj_surface = SURFACE(out_surface_id);
5437         assert(obj_surface);
5438         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5439
5440         dst_surface.base = (struct object_base *)obj_surface;
5441         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5442         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5443         dst_rect.x = 0;
5444         dst_rect.y = 0;
5445         dst_rect.width = in_width;
5446         dst_rect.height = in_height;
5447
5448         status = i965_image_processing(ctx,
5449                                        &src_surface,
5450                                        &src_rect,
5451                                        &dst_surface,
5452                                        &dst_rect);
5453         assert(status == VA_STATUS_SUCCESS);
5454
5455         src_surface.base = (struct object_base *)obj_surface;
5456         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5457         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5458     }
5459
5460     if (pipeline_param->surface_region) {
5461         src_rect.x = pipeline_param->surface_region->x;
5462         src_rect.y = pipeline_param->surface_region->y;
5463         src_rect.width = pipeline_param->surface_region->width;
5464         src_rect.height = pipeline_param->surface_region->height;
5465     } else {
5466         src_rect.x = 0;
5467         src_rect.y = 0;
5468         src_rect.width = in_width;
5469         src_rect.height = in_height;
5470     }
5471
5472     if (pipeline_param->output_region) {
5473         dst_rect.x = pipeline_param->output_region->x;
5474         dst_rect.y = pipeline_param->output_region->y;
5475         dst_rect.width = pipeline_param->output_region->width;
5476         dst_rect.height = pipeline_param->output_region->height;
5477     } else {
5478         dst_rect.x = 0;
5479         dst_rect.y = 0;
5480         dst_rect.width = in_width;
5481         dst_rect.height = in_height;
5482     }
5483
5484     proc_context->pp_context.pipeline_param = pipeline_param;
5485
5486     for (i = 0; i < pipeline_param->num_filters; i++) {
5487         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5488         VAProcFilterParameterBufferBase *filter_param = NULL;
5489         VAProcFilterType filter_type;
5490         int kernel_index;
5491
5492         if (!obj_buffer ||
5493             !obj_buffer->buffer_store ||
5494             !obj_buffer->buffer_store->buffer) {
5495             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
5496             goto error;
5497         }
5498
5499         out_surface_id = VA_INVALID_ID;
5500         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5501         filter_type = filter_param->type;
5502         kernel_index = procfilter_to_pp_flag[filter_type];
5503
5504         if (kernel_index != PP_NULL &&
5505             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5506             status = i965_CreateSurfaces(ctx,
5507                                          in_width,
5508                                          in_height,
5509                                          VA_RT_FORMAT_YUV420,
5510                                          1,
5511                                          &out_surface_id);
5512             assert(status == VA_STATUS_SUCCESS);
5513             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5514             obj_surface = SURFACE(out_surface_id);
5515             assert(obj_surface);
5516             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5517             dst_surface.base = (struct object_base *)obj_surface;
5518             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5519             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5520                                                    &src_surface,
5521                                                    &src_rect,
5522                                                    &dst_surface,
5523                                                    &src_rect,
5524                                                    kernel_index,
5525                                                    filter_param);
5526
5527             if (status == VA_STATUS_SUCCESS) {
5528                 src_surface.base = dst_surface.base;
5529                 src_surface.type = dst_surface.type;
5530                 src_surface.flags = dst_surface.flags;
5531             }
5532         }
5533     }
5534
5535     proc_context->pp_context.pipeline_param = NULL;
5536     obj_surface = SURFACE(proc_state->current_render_target);
5537     
5538     if (!obj_surface) {
5539         status = VA_STATUS_ERROR_INVALID_SURFACE;
5540         goto error;
5541     }
5542
5543     int csc_needed = 0;
5544     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
5545         csc_needed = 1;
5546         out_surface_id = VA_INVALID_ID;
5547         status = i965_CreateSurfaces(ctx,
5548                                      obj_surface->orig_width,
5549                                      obj_surface->orig_height,
5550                                      VA_RT_FORMAT_YUV420, 
5551                                      1,
5552                                      &out_surface_id);
5553         assert(status == VA_STATUS_SUCCESS);
5554         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5555         struct object_surface *csc_surface = SURFACE(out_surface_id);
5556         assert(csc_surface);
5557         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5558         dst_surface.base = (struct object_base *)csc_surface;
5559     } else {
5560         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5561         dst_surface.base = (struct object_base *)obj_surface;
5562     }
5563
5564     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5565     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
5566
5567     // load/save doesn't support different origin offset for src and dst surface
5568     if (src_rect.width == dst_rect.width &&
5569         src_rect.height == dst_rect.height &&
5570         src_rect.x == dst_rect.x &&
5571         src_rect.y == dst_rect.y) {
5572         i965_post_processing_internal(ctx, &proc_context->pp_context,
5573                                       &src_surface,
5574                                       &src_rect,
5575                                       &dst_surface,
5576                                       &dst_rect,
5577                                       PP_NV12_LOAD_SAVE_N12,
5578                                       NULL);
5579     } else {
5580
5581         i965_post_processing_internal(ctx, &proc_context->pp_context,
5582                                       &src_surface,
5583                                       &src_rect,
5584                                       &dst_surface,
5585                                       &dst_rect,
5586                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5587                                       PP_NV12_AVS : PP_NV12_SCALING,
5588                                       NULL);
5589     }
5590
5591     if (csc_needed) {
5592         src_surface.base = dst_surface.base;
5593         src_surface.type = dst_surface.type;
5594         src_surface.flags = dst_surface.flags;
5595         dst_surface.base = (struct object_base *)obj_surface;
5596         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5597         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5598     }
5599     
5600     if (num_tmp_surfaces)
5601         i965_DestroySurfaces(ctx,
5602                              tmp_surfaces,
5603                              num_tmp_surfaces);
5604
5605     intel_batchbuffer_flush(hw_context->batch);
5606
5607     return VA_STATUS_SUCCESS;
5608
5609 error:
5610     if (num_tmp_surfaces)
5611         i965_DestroySurfaces(ctx,
5612                              tmp_surfaces,
5613                              num_tmp_surfaces);
5614
5615     return status;
5616 }
5617
5618 static void
5619 i965_proc_context_destroy(void *hw_context)
5620 {
5621     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5622
5623     i965_post_processing_context_finalize(&proc_context->pp_context);
5624     intel_batchbuffer_free(proc_context->base.batch);
5625     free(proc_context);
5626 }
5627
5628 struct hw_context *
5629 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5630 {
5631     struct i965_driver_data *i965 = i965_driver_data(ctx);
5632     struct intel_driver_data *intel = intel_driver_data(ctx);
5633     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5634
5635     proc_context->base.destroy = i965_proc_context_destroy;
5636     proc_context->base.run = i965_proc_picture;
5637     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5638     i965->codec_info->post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5639
5640     return (struct hw_context *)proc_context;
5641 }
5642
5643