work around hw limitation(dword alignment) of horizontal offset
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 };
639
640 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
641                                            const struct i965_surface *src_surface,
642                                            const VARectangle *src_rect,
643                                            struct i965_surface *dst_surface,
644                                            const VARectangle *dst_rect,
645                                            void *filter_param);
646 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
647                                              const struct i965_surface *src_surface,
648                                              const VARectangle *src_rect,
649                                              struct i965_surface *dst_surface,
650                                              const VARectangle *dst_rect,
651                                              void *filter_param);
652 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
653                                            const struct i965_surface *src_surface,
654                                            const VARectangle *src_rect,
655                                            struct i965_surface *dst_surface,
656                                            const VARectangle *dst_rect,
657                                            void *filter_param);
658
659 static struct pp_module pp_modules_gen7[] = {
660     {
661         {
662             "NULL module (for testing)",
663             PP_NULL,
664             pp_null_gen7,
665             sizeof(pp_null_gen7),
666             NULL,
667         },
668
669         pp_null_initialize,
670     },
671
672     {
673         {
674             "NV12_NV12",
675             PP_NV12_LOAD_SAVE_N12,
676             pp_nv12_load_save_nv12_gen7,
677             sizeof(pp_nv12_load_save_nv12_gen7),
678             NULL,
679         },
680
681         gen7_pp_plx_avs_initialize,
682     },
683
684     {
685         {
686             "NV12_PL3",
687             PP_NV12_LOAD_SAVE_PL3,
688             pp_nv12_load_save_pl3_gen7,
689             sizeof(pp_nv12_load_save_pl3_gen7),
690             NULL,
691         },
692         
693         gen7_pp_plx_avs_initialize,
694     },
695
696     {
697         {
698             "PL3_NV12",
699             PP_PL3_LOAD_SAVE_N12,
700             pp_pl3_load_save_nv12_gen7,
701             sizeof(pp_pl3_load_save_nv12_gen7),
702             NULL,
703         },
704
705         gen7_pp_plx_avs_initialize,
706     },
707
708     {
709         {
710             "PL3_PL3",
711             PP_PL3_LOAD_SAVE_N12,
712             pp_pl3_load_save_pl3_gen7,
713             sizeof(pp_pl3_load_save_pl3_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12 Scaling module",
723             PP_NV12_SCALING,
724             pp_nv12_scaling_gen7,
725             sizeof(pp_nv12_scaling_gen7),
726             NULL,
727         },
728
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "NV12 AVS module",
735             PP_NV12_AVS,
736             pp_nv12_avs_gen7,
737             sizeof(pp_nv12_avs_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "NV12 DNDI module",
747             PP_NV12_DNDI,
748             pp_nv12_dndi_gen7,
749             sizeof(pp_nv12_dndi_gen7),
750             NULL,
751         },
752
753         gen7_pp_nv12_dndi_initialize,
754     },
755
756     {
757         {
758             "NV12 DN module",
759             PP_NV12_DN,
760             pp_nv12_dn_gen7,
761             sizeof(pp_nv12_dn_gen7),
762             NULL,
763         },
764
765         gen7_pp_nv12_dn_initialize,
766     },
767     {
768         {
769             "NV12_PA module",
770             PP_NV12_LOAD_SAVE_PA,
771             pp_nv12_load_save_pa_gen7,
772             sizeof(pp_nv12_load_save_pa_gen7),
773             NULL,
774         },
775     
776         gen7_pp_plx_avs_initialize,
777     },
778
779     {
780         {
781             "PL3_PA module",
782             PP_PL3_LOAD_SAVE_PA,
783             pp_pl3_load_save_pa_gen7,
784             sizeof(pp_pl3_load_save_pa_gen7),
785             NULL,
786         },
787     
788         gen7_pp_plx_avs_initialize,
789     },
790
791     {
792         {
793             "PA_NV12 module",
794             PP_PA_LOAD_SAVE_NV12,
795             pp_pa_load_save_nv12_gen7,
796             sizeof(pp_pa_load_save_nv12_gen7),
797             NULL,
798         },
799     
800         gen7_pp_plx_avs_initialize,
801     },
802
803     {
804         {
805             "PA_PL3 module",
806             PP_PA_LOAD_SAVE_PL3,
807             pp_pa_load_save_pl3_gen7,
808             sizeof(pp_pa_load_save_pl3_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814     
815     {
816         {
817             "RGBX_NV12 module",
818             PP_RGBX_LOAD_SAVE_NV12,
819             pp_rgbx_load_save_nv12_gen7,
820             sizeof(pp_rgbx_load_save_nv12_gen7),
821             NULL,
822         },
823     
824         pp_plx_load_save_plx_initialize,
825     },
826
827     {
828         {
829             "NV12_RGBX module",
830             PP_NV12_LOAD_SAVE_RGBX,
831             pp_nv12_load_save_rgbx_gen7,
832             sizeof(pp_nv12_load_save_rgbx_gen7),
833             NULL,
834         },
835     
836         pp_plx_load_save_plx_initialize,
837     },
838             
839 };
840
841 static int
842 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     int fourcc;
846
847     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
848         struct object_image *obj_image = IMAGE(surface->id);
849         fourcc = obj_image->image.format.fourcc;
850     } else {
851         struct object_surface *obj_surface = SURFACE(surface->id);
852         fourcc = obj_surface->fourcc;
853     }
854
855     return fourcc;
856 }
857
858 static void
859 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
860 {
861     switch (tiling) {
862     case I915_TILING_NONE:
863         ss->ss3.tiled_surface = 0;
864         ss->ss3.tile_walk = 0;
865         break;
866     case I915_TILING_X:
867         ss->ss3.tiled_surface = 1;
868         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
869         break;
870     case I915_TILING_Y:
871         ss->ss3.tiled_surface = 1;
872         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
873         break;
874     }
875 }
876
877 static void
878 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
879 {
880     switch (tiling) {
881     case I915_TILING_NONE:
882         ss->ss2.tiled_surface = 0;
883         ss->ss2.tile_walk = 0;
884         break;
885     case I915_TILING_X:
886         ss->ss2.tiled_surface = 1;
887         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
888         break;
889     case I915_TILING_Y:
890         ss->ss2.tiled_surface = 1;
891         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
892         break;
893     }
894 }
895
896 static void
897 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
898 {
899     switch (tiling) {
900     case I915_TILING_NONE:
901         ss->ss0.tiled_surface = 0;
902         ss->ss0.tile_walk = 0;
903         break;
904     case I915_TILING_X:
905         ss->ss0.tiled_surface = 1;
906         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
907         break;
908     case I915_TILING_Y:
909         ss->ss0.tiled_surface = 1;
910         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
911         break;
912     }
913 }
914
915 static void
916 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
917 {
918     switch (tiling) {
919     case I915_TILING_NONE:
920         ss->ss2.tiled_surface = 0;
921         ss->ss2.tile_walk = 0;
922         break;
923     case I915_TILING_X:
924         ss->ss2.tiled_surface = 1;
925         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
926         break;
927     case I915_TILING_Y:
928         ss->ss2.tiled_surface = 1;
929         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
930         break;
931     }
932 }
933
934 static void
935 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
936 {
937     struct i965_interface_descriptor *desc;
938     dri_bo *bo;
939     int pp_index = pp_context->current_pp;
940
941     bo = pp_context->idrt.bo;
942     dri_bo_map(bo, 1);
943     assert(bo->virtual);
944     desc = bo->virtual;
945     memset(desc, 0, sizeof(*desc));
946     desc->desc0.grf_reg_blocks = 10;
947     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
948     desc->desc1.const_urb_entry_read_offset = 0;
949     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
950     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
951     desc->desc2.sampler_count = 0;
952     desc->desc3.binding_table_entry_count = 0;
953     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
954
955     dri_bo_emit_reloc(bo,
956                       I915_GEM_DOMAIN_INSTRUCTION, 0,
957                       desc->desc0.grf_reg_blocks,
958                       offsetof(struct i965_interface_descriptor, desc0),
959                       pp_context->pp_modules[pp_index].kernel.bo);
960
961     dri_bo_emit_reloc(bo,
962                       I915_GEM_DOMAIN_INSTRUCTION, 0,
963                       desc->desc2.sampler_count << 2,
964                       offsetof(struct i965_interface_descriptor, desc2),
965                       pp_context->sampler_state_table.bo);
966
967     dri_bo_unmap(bo);
968     pp_context->idrt.num_interface_descriptors++;
969 }
970
971 static void
972 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
973 {
974     struct i965_vfe_state *vfe_state;
975     dri_bo *bo;
976
977     bo = pp_context->vfe_state.bo;
978     dri_bo_map(bo, 1);
979     assert(bo->virtual);
980     vfe_state = bo->virtual;
981     memset(vfe_state, 0, sizeof(*vfe_state));
982     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
983     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
984     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
985     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
986     vfe_state->vfe1.children_present = 0;
987     vfe_state->vfe2.interface_descriptor_base = 
988         pp_context->idrt.bo->offset >> 4; /* reloc */
989     dri_bo_emit_reloc(bo,
990                       I915_GEM_DOMAIN_INSTRUCTION, 0,
991                       0,
992                       offsetof(struct i965_vfe_state, vfe2),
993                       pp_context->idrt.bo);
994     dri_bo_unmap(bo);
995 }
996
997 static void
998 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
999 {
1000     unsigned char *constant_buffer;
1001     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1002
1003     assert(sizeof(*pp_static_parameter) == 128);
1004     dri_bo_map(pp_context->curbe.bo, 1);
1005     assert(pp_context->curbe.bo->virtual);
1006     constant_buffer = pp_context->curbe.bo->virtual;
1007     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1008     dri_bo_unmap(pp_context->curbe.bo);
1009 }
1010
1011 static void
1012 ironlake_pp_states_setup(VADriverContextP ctx,
1013                          struct i965_post_processing_context *pp_context)
1014 {
1015     ironlake_pp_interface_descriptor_table(pp_context);
1016     ironlake_pp_vfe_state(pp_context);
1017     ironlake_pp_upload_constants(pp_context);
1018 }
1019
1020 static void
1021 ironlake_pp_pipeline_select(VADriverContextP ctx,
1022                             struct i965_post_processing_context *pp_context)
1023 {
1024     struct intel_batchbuffer *batch = pp_context->batch;
1025
1026     BEGIN_BATCH(batch, 1);
1027     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1028     ADVANCE_BATCH(batch);
1029 }
1030
1031 static void
1032 ironlake_pp_urb_layout(VADriverContextP ctx,
1033                        struct i965_post_processing_context *pp_context)
1034 {
1035     struct intel_batchbuffer *batch = pp_context->batch;
1036     unsigned int vfe_fence, cs_fence;
1037
1038     vfe_fence = pp_context->urb.cs_start;
1039     cs_fence = pp_context->urb.size;
1040
1041     BEGIN_BATCH(batch, 3);
1042     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1043     OUT_BATCH(batch, 0);
1044     OUT_BATCH(batch, 
1045               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1046               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1047     ADVANCE_BATCH(batch);
1048 }
1049
1050 static void
1051 ironlake_pp_state_base_address(VADriverContextP ctx,
1052                                struct i965_post_processing_context *pp_context)
1053 {
1054     struct intel_batchbuffer *batch = pp_context->batch;
1055
1056     BEGIN_BATCH(batch, 8);
1057     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1058     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1059     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1060     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1061     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1062     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1063     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1064     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1065     ADVANCE_BATCH(batch);
1066 }
1067
1068 static void
1069 ironlake_pp_state_pointers(VADriverContextP ctx,
1070                            struct i965_post_processing_context *pp_context)
1071 {
1072     struct intel_batchbuffer *batch = pp_context->batch;
1073
1074     BEGIN_BATCH(batch, 3);
1075     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1076     OUT_BATCH(batch, 0);
1077     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1078     ADVANCE_BATCH(batch);
1079 }
1080
1081 static void 
1082 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1083                           struct i965_post_processing_context *pp_context)
1084 {
1085     struct intel_batchbuffer *batch = pp_context->batch;
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1089     OUT_BATCH(batch,
1090               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1091               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1092     ADVANCE_BATCH(batch);
1093 }
1094
1095 static void
1096 ironlake_pp_constant_buffer(VADriverContextP ctx,
1097                             struct i965_post_processing_context *pp_context)
1098 {
1099     struct intel_batchbuffer *batch = pp_context->batch;
1100
1101     BEGIN_BATCH(batch, 2);
1102     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1103     OUT_RELOC(batch, pp_context->curbe.bo,
1104               I915_GEM_DOMAIN_INSTRUCTION, 0,
1105               pp_context->urb.size_cs_entry - 1);
1106     ADVANCE_BATCH(batch);    
1107 }
1108
1109 static void
1110 ironlake_pp_object_walker(VADriverContextP ctx,
1111                           struct i965_post_processing_context *pp_context)
1112 {
1113     struct intel_batchbuffer *batch = pp_context->batch;
1114     int x, x_steps, y, y_steps;
1115     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1116
1117     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1118     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1119
1120     for (y = 0; y < y_steps; y++) {
1121         for (x = 0; x < x_steps; x++) {
1122             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1123                 BEGIN_BATCH(batch, 20);
1124                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1125                 OUT_BATCH(batch, 0);
1126                 OUT_BATCH(batch, 0); /* no indirect data */
1127                 OUT_BATCH(batch, 0);
1128
1129                 /* inline data grf 5-6 */
1130                 assert(sizeof(*pp_inline_parameter) == 64);
1131                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1132
1133                 ADVANCE_BATCH(batch);
1134             }
1135         }
1136     }
1137 }
1138
1139 static void
1140 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1141                            struct i965_post_processing_context *pp_context)
1142 {
1143     struct intel_batchbuffer *batch = pp_context->batch;
1144
1145     intel_batchbuffer_start_atomic(batch, 0x1000);
1146     intel_batchbuffer_emit_mi_flush(batch);
1147     ironlake_pp_pipeline_select(ctx, pp_context);
1148     ironlake_pp_state_base_address(ctx, pp_context);
1149     ironlake_pp_state_pointers(ctx, pp_context);
1150     ironlake_pp_urb_layout(ctx, pp_context);
1151     ironlake_pp_cs_urb_layout(ctx, pp_context);
1152     ironlake_pp_constant_buffer(ctx, pp_context);
1153     ironlake_pp_object_walker(ctx, pp_context);
1154     intel_batchbuffer_end_atomic(batch);
1155 }
1156
1157 // update u/v offset when the surface format are packed yuv
1158 static void i965_update_src_surface_static_parameter(
1159     VADriverContextP    ctx, 
1160     struct i965_post_processing_context *pp_context,
1161     const struct i965_surface *surface)
1162 {
1163     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1164     int fourcc = pp_get_surface_fourcc(ctx, surface);
1165
1166     switch (fourcc) {
1167     case VA_FOURCC('Y', 'U', 'Y', '2'):
1168         pp_static_parameter->grf1.source_packed_u_offset = 1;
1169         pp_static_parameter->grf1.source_packed_v_offset = 3;
1170         break;
1171     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1172         pp_static_parameter->grf1.source_packed_y_offset = 1;
1173         pp_static_parameter->grf1.source_packed_v_offset = 2;
1174         break;
1175     case VA_FOURCC('B', 'G', 'R', 'X'):
1176     case VA_FOURCC('B', 'G', 'R', 'A'):
1177         pp_static_parameter->grf1.source_rgb_layout = 0;
1178         break;
1179     case VA_FOURCC('R', 'G', 'B', 'X'):
1180     case VA_FOURCC('R', 'G', 'B', 'A'):
1181         pp_static_parameter->grf1.source_rgb_layout = 1;
1182         break;
1183     default:
1184         break;
1185     }
1186     
1187 }
1188
1189 static void i965_update_dst_surface_static_parameter(
1190     VADriverContextP    ctx, 
1191     struct i965_post_processing_context *pp_context,
1192     const struct i965_surface *surface)
1193 {
1194     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1195     int fourcc = pp_get_surface_fourcc(ctx, surface);
1196
1197     switch (fourcc) {
1198     case VA_FOURCC('Y', 'U', 'Y', '2'):
1199         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1200         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1201         break;
1202     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1203         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1204         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1205         break;
1206     case VA_FOURCC('B', 'G', 'R', 'X'):
1207     case VA_FOURCC('B', 'G', 'R', 'A'):
1208         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1209         break;
1210     case VA_FOURCC('R', 'G', 'B', 'X'):
1211     case VA_FOURCC('R', 'G', 'B', 'A'):
1212         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1213         break;
1214     default:
1215         break;
1216     }
1217     
1218 }
1219
1220 static void
1221 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1222                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1223                           int width, int height, int pitch, int format, 
1224                           int index, int is_target)
1225 {
1226     struct i965_surface_state *ss;
1227     dri_bo *ss_bo;
1228     unsigned int tiling;
1229     unsigned int swizzle;
1230
1231     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1232     ss_bo = pp_context->surface_state_binding_table.bo;
1233     assert(ss_bo);
1234
1235     dri_bo_map(ss_bo, True);
1236     assert(ss_bo->virtual);
1237     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1238     memset(ss, 0, sizeof(*ss));
1239     ss->ss0.surface_type = I965_SURFACE_2D;
1240     ss->ss0.surface_format = format;
1241     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1242     ss->ss2.width = width - 1;
1243     ss->ss2.height = height - 1;
1244     ss->ss3.pitch = pitch - 1;
1245     pp_set_surface_tiling(ss, tiling);
1246     dri_bo_emit_reloc(ss_bo,
1247                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1248                       surf_bo_offset,
1249                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1250                       surf_bo);
1251     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1252     dri_bo_unmap(ss_bo);
1253 }
1254
1255 static void
1256 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1257                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1258                            int width, int height, int wpitch,
1259                            int xoffset, int yoffset,
1260                            int format, int interleave_chroma,
1261                            int index)
1262 {
1263     struct i965_surface_state2 *ss2;
1264     dri_bo *ss2_bo;
1265     unsigned int tiling;
1266     unsigned int swizzle;
1267
1268     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1269     ss2_bo = pp_context->surface_state_binding_table.bo;
1270     assert(ss2_bo);
1271
1272     dri_bo_map(ss2_bo, True);
1273     assert(ss2_bo->virtual);
1274     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1275     memset(ss2, 0, sizeof(*ss2));
1276     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1277     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1278     ss2->ss1.width = width - 1;
1279     ss2->ss1.height = height - 1;
1280     ss2->ss2.pitch = wpitch - 1;
1281     ss2->ss2.interleave_chroma = interleave_chroma;
1282     ss2->ss2.surface_format = format;
1283     ss2->ss3.x_offset_for_cb = xoffset;
1284     ss2->ss3.y_offset_for_cb = yoffset;
1285     pp_set_surface2_tiling(ss2, tiling);
1286     dri_bo_emit_reloc(ss2_bo,
1287                       I915_GEM_DOMAIN_RENDER, 0,
1288                       surf_bo_offset,
1289                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1290                       surf_bo);
1291     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1292     dri_bo_unmap(ss2_bo);
1293 }
1294
1295 static void
1296 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1297                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1298                           int width, int height, int pitch, int format, 
1299                           int index, int is_target)
1300 {
1301     struct gen7_surface_state *ss;
1302     dri_bo *ss_bo;
1303     unsigned int tiling;
1304     unsigned int swizzle;
1305
1306     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1307     ss_bo = pp_context->surface_state_binding_table.bo;
1308     assert(ss_bo);
1309
1310     dri_bo_map(ss_bo, True);
1311     assert(ss_bo->virtual);
1312     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1313     memset(ss, 0, sizeof(*ss));
1314     ss->ss0.surface_type = I965_SURFACE_2D;
1315     ss->ss0.surface_format = format;
1316     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1317     ss->ss2.width = width - 1;
1318     ss->ss2.height = height - 1;
1319     ss->ss3.pitch = pitch - 1;
1320     gen7_pp_set_surface_tiling(ss, tiling);
1321     dri_bo_emit_reloc(ss_bo,
1322                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1323                       surf_bo_offset,
1324                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1325                       surf_bo);
1326     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1327     dri_bo_unmap(ss_bo);
1328 }
1329
1330 static void
1331 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1332                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1333                            int width, int height, int wpitch,
1334                            int xoffset, int yoffset,
1335                            int format, int interleave_chroma,
1336                            int index)
1337 {
1338     struct gen7_surface_state2 *ss2;
1339     dri_bo *ss2_bo;
1340     unsigned int tiling;
1341     unsigned int swizzle;
1342
1343     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1344     ss2_bo = pp_context->surface_state_binding_table.bo;
1345     assert(ss2_bo);
1346
1347     dri_bo_map(ss2_bo, True);
1348     assert(ss2_bo->virtual);
1349     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1350     memset(ss2, 0, sizeof(*ss2));
1351     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1352     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1353     ss2->ss1.width = width - 1;
1354     ss2->ss1.height = height - 1;
1355     ss2->ss2.pitch = wpitch - 1;
1356     ss2->ss2.interleave_chroma = interleave_chroma;
1357     ss2->ss2.surface_format = format;
1358     ss2->ss3.x_offset_for_cb = xoffset;
1359     ss2->ss3.y_offset_for_cb = yoffset;
1360     gen7_pp_set_surface2_tiling(ss2, tiling);
1361     dri_bo_emit_reloc(ss2_bo,
1362                       I915_GEM_DOMAIN_RENDER, 0,
1363                       surf_bo_offset,
1364                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1365                       surf_bo);
1366     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1367     dri_bo_unmap(ss2_bo);
1368 }
1369
1370 static void 
1371 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1372                                 const struct i965_surface *surface, 
1373                                 int base_index, int is_target,
1374                                 int *width, int *height, int *pitch, int *offset)
1375 {
1376     struct i965_driver_data *i965 = i965_driver_data(ctx);
1377     struct object_surface *obj_surface;
1378     struct object_image *obj_image;
1379     dri_bo *bo;
1380     int fourcc = pp_get_surface_fourcc(ctx, surface);
1381     const int Y = 0;
1382     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1383     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1384     const int UV = 1;
1385     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1386     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1387     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1388                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1389                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1390                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1391     int scale_factor_of_1st_plane_width_in_byte = 1;
1392                               
1393     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1394         obj_surface = SURFACE(surface->id);
1395         bo = obj_surface->bo;
1396         width[0] = obj_surface->orig_width;
1397         height[0] = obj_surface->orig_height;
1398         pitch[0] = obj_surface->width;
1399         offset[0] = 0;
1400
1401         if (full_packed_format) {
1402             scale_factor_of_1st_plane_width_in_byte = 4; 
1403             pitch[0] = obj_surface->width * 4;
1404         }
1405         else if (packed_yuv ) {
1406             scale_factor_of_1st_plane_width_in_byte =  2; 
1407             pitch[0] = obj_surface->width * 2;
1408         }
1409         else if (interleaved_uv) {
1410             width[1] = obj_surface->orig_width;
1411             height[1] = obj_surface->orig_height / 2;
1412             pitch[1] = obj_surface->width;
1413             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1414         } else {
1415             width[1] = obj_surface->orig_width / 2;
1416             height[1] = obj_surface->orig_height / 2;
1417             pitch[1] = obj_surface->width / 2;
1418             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1419             width[2] = obj_surface->orig_width / 2;
1420             height[2] = obj_surface->orig_height / 2;
1421             pitch[2] = obj_surface->width / 2;
1422             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1423         }
1424     } else {
1425         obj_image = IMAGE(surface->id);
1426         bo = obj_image->bo;
1427         width[0] = obj_image->image.width;
1428         height[0] = obj_image->image.height;
1429         pitch[0] = obj_image->image.pitches[0];
1430         offset[0] = obj_image->image.offsets[0];
1431
1432         if (full_packed_format) {
1433             scale_factor_of_1st_plane_width_in_byte = 4;
1434         }
1435         else if (packed_yuv ) {
1436             scale_factor_of_1st_plane_width_in_byte = 2;
1437         }
1438         else if (interleaved_uv) {
1439             width[1] = obj_image->image.width;
1440             height[1] = obj_image->image.height / 2;
1441             pitch[1] = obj_image->image.pitches[1];
1442             offset[1] = obj_image->image.offsets[1];
1443         } else {
1444             width[1] = obj_image->image.width / 2;
1445             height[1] = obj_image->image.height / 2;
1446             pitch[1] = obj_image->image.pitches[1];
1447             offset[1] = obj_image->image.offsets[1];
1448             width[2] = obj_image->image.width / 2;
1449             height[2] = obj_image->image.height / 2;
1450             pitch[2] = obj_image->image.pitches[2];
1451             offset[2] = obj_image->image.offsets[2];
1452         }
1453     }
1454
1455     /* Y surface */
1456     i965_pp_set_surface_state(ctx, pp_context,
1457                               bo, offset[Y],
1458                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1459                               base_index, is_target);
1460
1461     if (!packed_yuv && !full_packed_format) {
1462         if (interleaved_uv) {
1463             i965_pp_set_surface_state(ctx, pp_context,
1464                                       bo, offset[UV],
1465                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1466                                       base_index + 1, is_target);
1467         } else {
1468             /* U surface */
1469             i965_pp_set_surface_state(ctx, pp_context,
1470                                       bo, offset[U],
1471                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1472                                       base_index + 1, is_target);
1473
1474             /* V surface */
1475             i965_pp_set_surface_state(ctx, pp_context,
1476                                       bo, offset[V],
1477                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1478                                       base_index + 2, is_target);
1479         }
1480     }
1481
1482 }
1483
1484 static void 
1485 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1486                                      const struct i965_surface *surface, 
1487                                      int base_index, int is_target,
1488                                      int *width, int *height, int *pitch, int *offset)
1489 {
1490     struct i965_driver_data *i965 = i965_driver_data(ctx);
1491     struct object_surface *obj_surface;
1492     struct object_image *obj_image;
1493     dri_bo *bo;
1494     int fourcc = pp_get_surface_fourcc(ctx, surface);
1495     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1496                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1497     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1498                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1499     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1500     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1501
1502     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1503         obj_surface = SURFACE(surface->id);
1504         bo = obj_surface->bo;
1505         width[0] = obj_surface->orig_width;
1506         height[0] = obj_surface->orig_height;
1507         pitch[0] = obj_surface->width;
1508         offset[0] = 0;
1509
1510         if (packed_yuv) {
1511             if (is_target)
1512                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1513             else
1514                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1515
1516             pitch[0] = obj_surface->width * 2;
1517         }
1518
1519         width[1] = obj_surface->cb_cr_width;
1520         height[1] = obj_surface->cb_cr_height;
1521         pitch[1] = obj_surface->cb_cr_pitch;
1522         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1523
1524         width[2] = obj_surface->cb_cr_width;
1525         height[2] = obj_surface->cb_cr_height;
1526         pitch[2] = obj_surface->cb_cr_pitch;
1527         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1528     } else {
1529         obj_image = IMAGE(surface->id);
1530         bo = obj_image->bo;
1531         width[0] = obj_image->image.width;
1532         height[0] = obj_image->image.height;
1533         pitch[0] = obj_image->image.pitches[0];
1534         offset[0] = obj_image->image.offsets[0];
1535
1536         if (packed_yuv) {
1537             if (is_target)
1538                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1539             else
1540                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1541         } else if (interleaved_uv) {
1542             width[1] = obj_image->image.width / 2;
1543             height[1] = obj_image->image.height / 2;
1544             pitch[1] = obj_image->image.pitches[1];
1545             offset[1] = obj_image->image.offsets[1];
1546         } else {
1547             width[1] = obj_image->image.width / 2;
1548             height[1] = obj_image->image.height / 2;
1549             pitch[1] = obj_image->image.pitches[U];
1550             offset[1] = obj_image->image.offsets[U];
1551             width[2] = obj_image->image.width / 2;
1552             height[2] = obj_image->image.height / 2;
1553             pitch[2] = obj_image->image.pitches[V];
1554             offset[2] = obj_image->image.offsets[V];
1555         }
1556     }
1557
1558     if (is_target) {
1559         gen7_pp_set_surface_state(ctx, pp_context,
1560                                   bo, 0,
1561                                   width[0] / 4, height[0], pitch[0],
1562                                   I965_SURFACEFORMAT_R8_SINT,
1563                                   base_index, 1);
1564
1565         if (!packed_yuv) {
1566             if (interleaved_uv) {
1567                 gen7_pp_set_surface_state(ctx, pp_context,
1568                                           bo, offset[1],
1569                                           width[1] / 2, height[1], pitch[1],
1570                                           I965_SURFACEFORMAT_R8G8_SINT,
1571                                           base_index + 1, 1);
1572             } else {
1573                 gen7_pp_set_surface_state(ctx, pp_context,
1574                                           bo, offset[1],
1575                                           width[1] / 4, height[1], pitch[1],
1576                                           I965_SURFACEFORMAT_R8_SINT,
1577                                           base_index + 1, 1);
1578                 gen7_pp_set_surface_state(ctx, pp_context,
1579                                           bo, offset[2],
1580                                           width[2] / 4, height[2], pitch[2],
1581                                           I965_SURFACEFORMAT_R8_SINT,
1582                                           base_index + 2, 1);
1583             }
1584         }
1585     } else {
1586         int format0 = SURFACE_FORMAT_Y8_UNORM;
1587
1588         switch (fourcc) {
1589         case VA_FOURCC('Y', 'U', 'Y', '2'):
1590             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1591             break;
1592
1593         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1594             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1595             break;
1596
1597         default:
1598             break;
1599         }
1600
1601         gen7_pp_set_surface2_state(ctx, pp_context,
1602                                    bo, offset[0],
1603                                    width[0], height[0], pitch[0],
1604                                    0, 0,
1605                                    format0, 0,
1606                                    base_index);
1607
1608         if (!packed_yuv) {
1609             if (interleaved_uv) {
1610                 gen7_pp_set_surface2_state(ctx, pp_context,
1611                                            bo, offset[1],
1612                                            width[1], height[1], pitch[1],
1613                                            0, 0,
1614                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1615                                            base_index + 1);
1616             } else {
1617                 gen7_pp_set_surface2_state(ctx, pp_context,
1618                                            bo, offset[1],
1619                                            width[1], height[1], pitch[1],
1620                                            0, 0,
1621                                            SURFACE_FORMAT_R8_UNORM, 0,
1622                                            base_index + 1);
1623                 gen7_pp_set_surface2_state(ctx, pp_context,
1624                                            bo, offset[2],
1625                                            width[2], height[2], pitch[2],
1626                                            0, 0,
1627                                            SURFACE_FORMAT_R8_UNORM, 0,
1628                                            base_index + 2);
1629             }
1630         }
1631     }
1632 }
1633
1634 static int
1635 pp_null_x_steps(void *private_context)
1636 {
1637     return 1;
1638 }
1639
1640 static int
1641 pp_null_y_steps(void *private_context)
1642 {
1643     return 1;
1644 }
1645
1646 static int
1647 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1648 {
1649     return 0;
1650 }
1651
1652 static VAStatus
1653 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1654                    const struct i965_surface *src_surface,
1655                    const VARectangle *src_rect,
1656                    struct i965_surface *dst_surface,
1657                    const VARectangle *dst_rect,
1658                    void *filter_param)
1659 {
1660     /* private function & data */
1661     pp_context->pp_x_steps = pp_null_x_steps;
1662     pp_context->pp_y_steps = pp_null_y_steps;
1663     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1664
1665     dst_surface->flags = src_surface->flags;
1666
1667     return VA_STATUS_SUCCESS;
1668 }
1669
1670 static int
1671 pp_load_save_x_steps(void *private_context)
1672 {
1673     return 1;
1674 }
1675
1676 static int
1677 pp_load_save_y_steps(void *private_context)
1678 {
1679     struct pp_load_save_context *pp_load_save_context = private_context;
1680
1681     return pp_load_save_context->dest_h / 8;
1682 }
1683
1684 static int
1685 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1686 {
1687     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1688     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1689
1690     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1691     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1692
1693     return 0;
1694 }
1695
1696 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1697 {
1698     int i;
1699     /* x offset of dest surface must be dword aligned.
1700      * so we have to extend dst surface on left edge, and mask out pixels not interested
1701      */
1702     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1703         pp_context->block_horizontal_mask_left = 0;
1704         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1705         {
1706             pp_context->block_horizontal_mask_left |= 1<<i;
1707         }
1708     }
1709     else {
1710         pp_context->block_horizontal_mask_left = 0xffff;
1711     }
1712     
1713     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1714     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1715         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1716     }
1717     else {
1718         pp_context->block_horizontal_mask_right = 0xffff;
1719     }
1720     
1721     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1722         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1723     }
1724     else {
1725         pp_context->block_vertical_mask_bottom = 0xff;
1726     }
1727
1728 }
1729 static VAStatus
1730 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1731                                 const struct i965_surface *src_surface,
1732                                 const VARectangle *src_rect,
1733                                 struct i965_surface *dst_surface,
1734                                 const VARectangle *dst_rect,
1735                                 void *filter_param)
1736 {
1737     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1738     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1739     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1740     int width[3], height[3], pitch[3], offset[3];
1741     const int Y = 0;
1742
1743     /* source surface */
1744     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1745                                     width, height, pitch, offset);
1746
1747     /* destination surface */
1748     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1749                                     width, height, pitch, offset);
1750
1751     /* private function & data */
1752     pp_context->pp_x_steps = pp_load_save_x_steps;
1753     pp_context->pp_y_steps = pp_load_save_y_steps;
1754     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1755
1756     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
1757     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
1758     pp_load_save_context->dest_y = dst_rect->y;
1759     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
1760     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
1761
1762     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
1763     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
1764
1765     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1766     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1767
1768     // update u/v offset for packed yuv
1769     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
1770     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
1771
1772     dst_surface->flags = src_surface->flags;
1773
1774     return VA_STATUS_SUCCESS;
1775 }
1776
1777 static int
1778 pp_scaling_x_steps(void *private_context)
1779 {
1780     return 1;
1781 }
1782
1783 static int
1784 pp_scaling_y_steps(void *private_context)
1785 {
1786     struct pp_scaling_context *pp_scaling_context = private_context;
1787
1788     return pp_scaling_context->dest_h / 8;
1789 }
1790
1791 static int
1792 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1793 {
1794     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1795     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1796     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1797     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1798     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1799
1800     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1801     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1802     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1803     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1804     
1805     return 0;
1806 }
1807
1808 static VAStatus
1809 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1810                            const struct i965_surface *src_surface,
1811                            const VARectangle *src_rect,
1812                            struct i965_surface *dst_surface,
1813                            const VARectangle *dst_rect,
1814                            void *filter_param)
1815 {
1816     struct i965_driver_data *i965 = i965_driver_data(ctx);
1817     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1818     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1819     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1820     struct object_surface *obj_surface;
1821     struct i965_sampler_state *sampler_state;
1822     int in_w, in_h, in_wpitch, in_hpitch;
1823     int out_w, out_h, out_wpitch, out_hpitch;
1824
1825     /* source surface */
1826     obj_surface = SURFACE(src_surface->id);
1827     in_w = obj_surface->orig_width;
1828     in_h = obj_surface->orig_height;
1829     in_wpitch = obj_surface->width;
1830     in_hpitch = obj_surface->height;
1831
1832     /* source Y surface index 1 */
1833     i965_pp_set_surface_state(ctx, pp_context,
1834                               obj_surface->bo, 0,
1835                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1836                               1, 0);
1837
1838     /* source UV surface index 2 */
1839     i965_pp_set_surface_state(ctx, pp_context,
1840                               obj_surface->bo, in_wpitch * in_hpitch,
1841                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1842                               2, 0);
1843
1844     /* destination surface */
1845     obj_surface = SURFACE(dst_surface->id);
1846     out_w = obj_surface->orig_width;
1847     out_h = obj_surface->orig_height;
1848     out_wpitch = obj_surface->width;
1849     out_hpitch = obj_surface->height;
1850
1851     /* destination Y surface index 7 */
1852     i965_pp_set_surface_state(ctx, pp_context,
1853                               obj_surface->bo, 0,
1854                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1855                               7, 1);
1856
1857     /* destination UV surface index 8 */
1858     i965_pp_set_surface_state(ctx, pp_context,
1859                               obj_surface->bo, out_wpitch * out_hpitch,
1860                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1861                               8, 1);
1862
1863     /* sampler state */
1864     dri_bo_map(pp_context->sampler_state_table.bo, True);
1865     assert(pp_context->sampler_state_table.bo->virtual);
1866     sampler_state = pp_context->sampler_state_table.bo->virtual;
1867
1868     /* SIMD16 Y index 1 */
1869     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1870     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1871     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1872     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1873     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1874
1875     /* SIMD16 UV index 2 */
1876     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1877     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1878     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1879     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1880     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1881
1882     dri_bo_unmap(pp_context->sampler_state_table.bo);
1883
1884     /* private function & data */
1885     pp_context->pp_x_steps = pp_scaling_x_steps;
1886     pp_context->pp_y_steps = pp_scaling_y_steps;
1887     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1888
1889     pp_scaling_context->dest_x = dst_rect->x;
1890     pp_scaling_context->dest_y = dst_rect->y;
1891     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1892     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1893     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1894     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1895
1896     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1897
1898     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1899     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1900     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1901
1902     dst_surface->flags = src_surface->flags;
1903
1904     return VA_STATUS_SUCCESS;
1905 }
1906
1907 static int
1908 pp_avs_x_steps(void *private_context)
1909 {
1910     struct pp_avs_context *pp_avs_context = private_context;
1911
1912     return pp_avs_context->dest_w / 16;
1913 }
1914
1915 static int
1916 pp_avs_y_steps(void *private_context)
1917 {
1918     return 1;
1919 }
1920
1921 static int
1922 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1923 {
1924     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1925     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1926     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1927     float src_x_steping, src_y_steping, video_step_delta;
1928     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1929
1930     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1931         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1932         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1933     } else if (tmp_w >= pp_avs_context->dest_w) {
1934         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1935         pp_inline_parameter->grf6.video_step_delta = 0;
1936         
1937         if (x == 0) {
1938             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1939                 pp_avs_context->src_normalized_x;
1940         } else {
1941             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1942             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1943             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1944                 16 * 15 * video_step_delta / 2;
1945         }
1946     } else {
1947         int n0, n1, n2, nls_left, nls_right;
1948         int factor_a = 5, factor_b = 4;
1949         float f;
1950
1951         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1952         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1953         n2 = tmp_w / (16 * factor_a);
1954         nls_left = n0 + n2;
1955         nls_right = n1 + n2;
1956         f = (float) n2 * 16 / tmp_w;
1957         
1958         if (n0 < 5) {
1959             pp_inline_parameter->grf6.video_step_delta = 0.0;
1960
1961             if (x == 0) {
1962                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1963                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1964             } else {
1965                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1966                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1967                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1968                     16 * 15 * video_step_delta / 2;
1969             }
1970         } else {
1971             if (x < nls_left) {
1972                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1973                 float a = f / (nls_left * 16 * factor_b);
1974                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1975                 
1976                 pp_inline_parameter->grf6.video_step_delta = b;
1977
1978                 if (x == 0) {
1979                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1980                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1981                 } else {
1982                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1983                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1984                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1985                         16 * 15 * video_step_delta / 2;
1986                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1987                 }
1988             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1989                 /* scale the center linearly */
1990                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1991                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1992                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1993                     16 * 15 * video_step_delta / 2;
1994                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1995                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1996             } else {
1997                 float a = f / (nls_right * 16 * factor_b);
1998                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1999
2000                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2001                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2002                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2003                     16 * 15 * video_step_delta / 2;
2004                 pp_inline_parameter->grf6.video_step_delta = -b;
2005
2006                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2007                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2008                 else
2009                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2010             }
2011         }
2012     }
2013
2014     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2015     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2016     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2017     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2018
2019     return 0;
2020 }
2021
2022 static VAStatus
2023 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2024                        const struct i965_surface *src_surface,
2025                        const VARectangle *src_rect,
2026                        struct i965_surface *dst_surface,
2027                        const VARectangle *dst_rect,
2028                        void *filter_param,
2029                        int nlas)
2030 {
2031     struct i965_driver_data *i965 = i965_driver_data(ctx);
2032     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2033     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2034     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2035     struct object_surface *obj_surface;
2036     struct i965_sampler_8x8 *sampler_8x8;
2037     struct i965_sampler_8x8_state *sampler_8x8_state;
2038     int index;
2039     int in_w, in_h, in_wpitch, in_hpitch;
2040     int out_w, out_h, out_wpitch, out_hpitch;
2041     int i;
2042
2043     /* surface */
2044     obj_surface = SURFACE(src_surface->id);
2045     in_w = obj_surface->orig_width;
2046     in_h = obj_surface->orig_height;
2047     in_wpitch = obj_surface->width;
2048     in_hpitch = obj_surface->height;
2049
2050     /* source Y surface index 1 */
2051     i965_pp_set_surface2_state(ctx, pp_context,
2052                                obj_surface->bo, 0,
2053                                in_w, in_h, in_wpitch,
2054                                0, 0,
2055                                SURFACE_FORMAT_Y8_UNORM, 0,
2056                                1);
2057
2058     /* source UV surface index 2 */
2059     i965_pp_set_surface2_state(ctx, pp_context,
2060                                obj_surface->bo, in_wpitch * in_hpitch,
2061                                in_w / 2, in_h / 2, in_wpitch,
2062                                0, 0,
2063                                SURFACE_FORMAT_R8B8_UNORM, 0,
2064                                2);
2065
2066     /* destination surface */
2067     obj_surface = SURFACE(dst_surface->id);
2068     out_w = obj_surface->orig_width;
2069     out_h = obj_surface->orig_height;
2070     out_wpitch = obj_surface->width;
2071     out_hpitch = obj_surface->height;
2072     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2073
2074     /* destination Y surface index 7 */
2075     i965_pp_set_surface_state(ctx, pp_context,
2076                               obj_surface->bo, 0,
2077                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2078                               7, 1);
2079
2080     /* destination UV surface index 8 */
2081     i965_pp_set_surface_state(ctx, pp_context,
2082                               obj_surface->bo, out_wpitch * out_hpitch,
2083                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2084                               8, 1);
2085
2086     /* sampler 8x8 state */
2087     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2088     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2089     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2090     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2091     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2092
2093     for (i = 0; i < 17; i++) {
2094         /* for Y channel, currently ignore */
2095         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2096         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2097         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2098         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2099         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2100         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2101         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2102         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2103         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2104         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2105         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2106         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2107         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2108         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2109         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2110         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2111         /* for U/V channel, 0.25 */
2112         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2113         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2114         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2115         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2116         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2117         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2118         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2119         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2120         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2121         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2122         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2123         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2124         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2125         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2126         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2127         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2128     }
2129
2130     sampler_8x8_state->dw136.default_sharpness_level = 0;
2131     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2132     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2133     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2134     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2135
2136     /* sampler 8x8 */
2137     dri_bo_map(pp_context->sampler_state_table.bo, True);
2138     assert(pp_context->sampler_state_table.bo->virtual);
2139     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2140     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2141
2142     /* sample_8x8 Y index 1 */
2143     index = 1;
2144     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2145     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2146     sampler_8x8[index].dw0.ief_bypass = 1;
2147     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2148     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2149     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2150     sampler_8x8[index].dw2.global_noise_estimation = 22;
2151     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2152     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2153     sampler_8x8[index].dw3.strong_edge_weight = 7;
2154     sampler_8x8[index].dw3.regular_weight = 2;
2155     sampler_8x8[index].dw3.non_edge_weight = 0;
2156     sampler_8x8[index].dw3.gain_factor = 40;
2157     sampler_8x8[index].dw4.steepness_boost = 0;
2158     sampler_8x8[index].dw4.steepness_threshold = 0;
2159     sampler_8x8[index].dw4.mr_boost = 0;
2160     sampler_8x8[index].dw4.mr_threshold = 5;
2161     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2162     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2163     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2164     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2165     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2166     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2167     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2168     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2169     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2170     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2171     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2172     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2173     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2174     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2175     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2176     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2177     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2178     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2179     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2180     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2181     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2182     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2183     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2184     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2185     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2186     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2187     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2188     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2189     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2190     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2191     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2192     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2193     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2194     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2195     sampler_8x8[index].dw13.limiter_boost = 0;
2196     sampler_8x8[index].dw13.minimum_limiter = 10;
2197     sampler_8x8[index].dw13.maximum_limiter = 11;
2198     sampler_8x8[index].dw14.clip_limiter = 130;
2199     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2200                       I915_GEM_DOMAIN_RENDER, 
2201                       0,
2202                       0,
2203                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2204                       pp_context->sampler_state_table.bo_8x8);
2205
2206     /* sample_8x8 UV index 2 */
2207     index = 2;
2208     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2209     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2210     sampler_8x8[index].dw0.ief_bypass = 1;
2211     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2212     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2213     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2214     sampler_8x8[index].dw2.global_noise_estimation = 22;
2215     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2216     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2217     sampler_8x8[index].dw3.strong_edge_weight = 7;
2218     sampler_8x8[index].dw3.regular_weight = 2;
2219     sampler_8x8[index].dw3.non_edge_weight = 0;
2220     sampler_8x8[index].dw3.gain_factor = 40;
2221     sampler_8x8[index].dw4.steepness_boost = 0;
2222     sampler_8x8[index].dw4.steepness_threshold = 0;
2223     sampler_8x8[index].dw4.mr_boost = 0;
2224     sampler_8x8[index].dw4.mr_threshold = 5;
2225     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2226     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2227     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2228     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2229     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2230     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2231     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2232     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2233     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2234     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2235     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2236     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2237     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2238     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2239     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2240     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2241     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2242     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2243     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2244     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2245     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2246     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2247     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2248     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2249     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2250     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2251     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2252     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2253     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2254     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2255     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2256     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2257     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2258     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2259     sampler_8x8[index].dw13.limiter_boost = 0;
2260     sampler_8x8[index].dw13.minimum_limiter = 10;
2261     sampler_8x8[index].dw13.maximum_limiter = 11;
2262     sampler_8x8[index].dw14.clip_limiter = 130;
2263     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2264                       I915_GEM_DOMAIN_RENDER, 
2265                       0,
2266                       0,
2267                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2268                       pp_context->sampler_state_table.bo_8x8);
2269
2270     dri_bo_unmap(pp_context->sampler_state_table.bo);
2271
2272     /* private function & data */
2273     pp_context->pp_x_steps = pp_avs_x_steps;
2274     pp_context->pp_y_steps = pp_avs_y_steps;
2275     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2276
2277     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2278     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2279     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2280     pp_avs_context->dest_y = dst_rect->y;
2281     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2282     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2283     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2284     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2285     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2286     pp_avs_context->src_h = src_rect->height;
2287
2288     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2289     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2290
2291     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2292     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2293     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2294     pp_inline_parameter->grf6.video_step_delta = 0.0;
2295
2296     dst_surface->flags = src_surface->flags;
2297
2298     return VA_STATUS_SUCCESS;
2299 }
2300
2301 static VAStatus
2302 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2303                             const struct i965_surface *src_surface,
2304                             const VARectangle *src_rect,
2305                             struct i965_surface *dst_surface,
2306                             const VARectangle *dst_rect,
2307                             void *filter_param)
2308 {
2309     return pp_nv12_avs_initialize(ctx, pp_context,
2310                                   src_surface,
2311                                   src_rect,
2312                                   dst_surface,
2313                                   dst_rect,
2314                                   filter_param,
2315                                   1);
2316 }
2317
2318 static VAStatus
2319 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2320                              const struct i965_surface *src_surface,
2321                              const VARectangle *src_rect,
2322                              struct i965_surface *dst_surface,
2323                              const VARectangle *dst_rect,
2324                              void *filter_param)
2325 {
2326     return pp_nv12_avs_initialize(ctx, pp_context,
2327                                   src_surface,
2328                                   src_rect,
2329                                   dst_surface,
2330                                   dst_rect,
2331                                   filter_param,
2332                                   0);    
2333 }
2334
2335 static int
2336 gen7_pp_avs_x_steps(void *private_context)
2337 {
2338     struct pp_avs_context *pp_avs_context = private_context;
2339
2340     return pp_avs_context->dest_w / 16;
2341 }
2342
2343 static int
2344 gen7_pp_avs_y_steps(void *private_context)
2345 {
2346     struct pp_avs_context *pp_avs_context = private_context;
2347
2348     return pp_avs_context->dest_h / 16;
2349 }
2350
2351 static int
2352 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2353 {
2354     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2355     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2356
2357     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2358     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2359     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2360     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2361
2362     return 0;
2363 }
2364
2365 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2366                                               struct i965_post_processing_context *pp_context,
2367                                               const struct i965_surface *surface)
2368 {
2369     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2370     int fourcc = pp_get_surface_fourcc(ctx, surface);
2371     
2372     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2373         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2374         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2375         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2376     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2377         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2378         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2379         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2380     }
2381 }
2382
2383 static VAStatus
2384 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2385                            const struct i965_surface *src_surface,
2386                            const VARectangle *src_rect,
2387                            struct i965_surface *dst_surface,
2388                            const VARectangle *dst_rect,
2389                            void *filter_param)
2390 {
2391     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2392     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2393     struct gen7_sampler_8x8 *sampler_8x8;
2394     struct i965_sampler_8x8_state *sampler_8x8_state;
2395     int index, i;
2396     int width[3], height[3], pitch[3], offset[3];
2397     int src_width, src_height;
2398
2399     /* source surface */
2400     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2401                                          width, height, pitch, offset);
2402     src_width = width[0];
2403     src_height = height[0];
2404
2405     /* destination surface */
2406     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2407                                          width, height, pitch, offset);
2408
2409     /* sampler 8x8 state */
2410     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2411     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2412     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2413     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2414     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2415
2416     for (i = 0; i < 17; i++) {
2417         /* for Y channel, currently ignore */
2418         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2419         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2420         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2421         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2422         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2423         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2424         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2425         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2426         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2427         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2428         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2429         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2430         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2431         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2432         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2433         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2434         /* for U/V channel, 0.25 */
2435         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2436         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2437         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2438         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2439         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2440         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2441         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2442         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2443         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2444         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2445         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2446         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2447         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2448         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2449         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2450         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2451     }
2452
2453     sampler_8x8_state->dw136.default_sharpness_level = 0;
2454     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2455     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2456     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2457     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2458
2459     /* sampler 8x8 */
2460     dri_bo_map(pp_context->sampler_state_table.bo, True);
2461     assert(pp_context->sampler_state_table.bo->virtual);
2462     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2463     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2464
2465     /* sample_8x8 Y index 4 */
2466     index = 4;
2467     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2468     sampler_8x8[index].dw0.global_noise_estimation = 255;
2469     sampler_8x8[index].dw0.ief_bypass = 1;
2470
2471     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2472
2473     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2474     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2475     sampler_8x8[index].dw2.r5x_coefficient = 9;
2476     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2477     sampler_8x8[index].dw2.r5c_coefficient = 3;
2478
2479     sampler_8x8[index].dw3.r3x_coefficient = 27;
2480     sampler_8x8[index].dw3.r3c_coefficient = 5;
2481     sampler_8x8[index].dw3.gain_factor = 40;
2482     sampler_8x8[index].dw3.non_edge_weight = 1;
2483     sampler_8x8[index].dw3.regular_weight = 2;
2484     sampler_8x8[index].dw3.strong_edge_weight = 7;
2485     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2486
2487     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2488                       I915_GEM_DOMAIN_RENDER, 
2489                       0,
2490                       0,
2491                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2492                       pp_context->sampler_state_table.bo_8x8);
2493
2494     /* sample_8x8 UV index 8 */
2495     index = 8;
2496     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2497     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2498     sampler_8x8[index].dw0.global_noise_estimation = 255;
2499     sampler_8x8[index].dw0.ief_bypass = 1;
2500     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2501     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2502     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2503     sampler_8x8[index].dw2.r5x_coefficient = 9;
2504     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2505     sampler_8x8[index].dw2.r5c_coefficient = 3;
2506     sampler_8x8[index].dw3.r3x_coefficient = 27;
2507     sampler_8x8[index].dw3.r3c_coefficient = 5;
2508     sampler_8x8[index].dw3.gain_factor = 40;
2509     sampler_8x8[index].dw3.non_edge_weight = 1;
2510     sampler_8x8[index].dw3.regular_weight = 2;
2511     sampler_8x8[index].dw3.strong_edge_weight = 7;
2512     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2513
2514     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2515                       I915_GEM_DOMAIN_RENDER, 
2516                       0,
2517                       0,
2518                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2519                       pp_context->sampler_state_table.bo_8x8);
2520
2521     /* sampler_8x8 V, index 12 */
2522     index = 12;
2523     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2524     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2525     sampler_8x8[index].dw0.global_noise_estimation = 255;
2526     sampler_8x8[index].dw0.ief_bypass = 1;
2527     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2528     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2529     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2530     sampler_8x8[index].dw2.r5x_coefficient = 9;
2531     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2532     sampler_8x8[index].dw2.r5c_coefficient = 3;
2533     sampler_8x8[index].dw3.r3x_coefficient = 27;
2534     sampler_8x8[index].dw3.r3c_coefficient = 5;
2535     sampler_8x8[index].dw3.gain_factor = 40;
2536     sampler_8x8[index].dw3.non_edge_weight = 1;
2537     sampler_8x8[index].dw3.regular_weight = 2;
2538     sampler_8x8[index].dw3.strong_edge_weight = 7;
2539     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2540
2541     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2542                       I915_GEM_DOMAIN_RENDER, 
2543                       0,
2544                       0,
2545                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2546                       pp_context->sampler_state_table.bo_8x8);
2547
2548     dri_bo_unmap(pp_context->sampler_state_table.bo);
2549
2550     /* private function & data */
2551     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2552     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2553     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2554
2555     pp_avs_context->dest_x = dst_rect->x;
2556     pp_avs_context->dest_y = dst_rect->y;
2557     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2558     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2559     pp_avs_context->src_w = src_rect->width;
2560     pp_avs_context->src_h = src_rect->height;
2561
2562     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2563     dw = MAX(dw, pp_avs_context->dest_w);
2564
2565     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2566     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2567     pp_static_parameter->grf2.avs_wa_width = dw;
2568     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2569     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2570
2571     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2572     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2573     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2574     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2575
2576     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2577
2578     dst_surface->flags = src_surface->flags;
2579
2580     return VA_STATUS_SUCCESS;
2581 }
2582
2583 static int
2584 pp_dndi_x_steps(void *private_context)
2585 {
2586     return 1;
2587 }
2588
2589 static int
2590 pp_dndi_y_steps(void *private_context)
2591 {
2592     struct pp_dndi_context *pp_dndi_context = private_context;
2593
2594     return pp_dndi_context->dest_h / 4;
2595 }
2596
2597 static int
2598 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2599 {
2600     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2601
2602     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2603     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2604
2605     return 0;
2606 }
2607
2608 static VAStatus
2609 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2610                         const struct i965_surface *src_surface,
2611                         const VARectangle *src_rect,
2612                         struct i965_surface *dst_surface,
2613                         const VARectangle *dst_rect,
2614                         void *filter_param)
2615 {
2616     struct i965_driver_data *i965 = i965_driver_data(ctx);
2617     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2618     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2619     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2620     struct object_surface *obj_surface;
2621     struct i965_sampler_dndi *sampler_dndi;
2622     int index;
2623     int w, h;
2624     int orig_w, orig_h;
2625     int dndi_top_first = 1;
2626
2627     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2628         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2629
2630     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2631         dndi_top_first = 1;
2632     else
2633         dndi_top_first = 0;
2634
2635     /* surface */
2636     obj_surface = SURFACE(src_surface->id);
2637     orig_w = obj_surface->orig_width;
2638     orig_h = obj_surface->orig_height;
2639     w = obj_surface->width;
2640     h = obj_surface->height;
2641
2642     if (pp_context->stmm.bo == NULL) {
2643         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2644                                            "STMM surface",
2645                                            w * h,
2646                                            4096);
2647         assert(pp_context->stmm.bo);
2648     }
2649
2650     /* source UV surface index 2 */
2651     i965_pp_set_surface_state(ctx, pp_context,
2652                               obj_surface->bo, w * h,
2653                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2654                               2, 0);
2655
2656     /* source YUV surface index 4 */
2657     i965_pp_set_surface2_state(ctx, pp_context,
2658                                obj_surface->bo, 0,
2659                                orig_w, orig_h, w,
2660                                0, h,
2661                                SURFACE_FORMAT_PLANAR_420_8, 1,
2662                                4);
2663
2664     /* source STMM surface index 20 */
2665     i965_pp_set_surface_state(ctx, pp_context,
2666                               pp_context->stmm.bo, 0,
2667                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2668                               20, 1);
2669
2670     /* destination surface */
2671     obj_surface = SURFACE(dst_surface->id);
2672     orig_w = obj_surface->orig_width;
2673     orig_h = obj_surface->orig_height;
2674     w = obj_surface->width;
2675     h = obj_surface->height;
2676
2677     /* destination Y surface index 7 */
2678     i965_pp_set_surface_state(ctx, pp_context,
2679                               obj_surface->bo, 0,
2680                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2681                               7, 1);
2682
2683     /* destination UV surface index 8 */
2684     i965_pp_set_surface_state(ctx, pp_context,
2685                               obj_surface->bo, w * h,
2686                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2687                               8, 1);
2688     /* sampler dndi */
2689     dri_bo_map(pp_context->sampler_state_table.bo, True);
2690     assert(pp_context->sampler_state_table.bo->virtual);
2691     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2692     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2693
2694     /* sample dndi index 1 */
2695     index = 0;
2696     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2697     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2698     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2699     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2700
2701     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2702     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2703     sampler_dndi[index].dw1.stmm_c2 = 1;
2704     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2705     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2706
2707     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2708     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2709     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2710     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2711
2712     sampler_dndi[index].dw3.maximum_stmm = 128;
2713     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2714     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2715     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2716     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2717
2718     sampler_dndi[index].dw4.sdi_delta = 8;
2719     sampler_dndi[index].dw4.sdi_threshold = 128;
2720     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2721     sampler_dndi[index].dw4.stmm_shift_up = 0;
2722     sampler_dndi[index].dw4.stmm_shift_down = 0;
2723     sampler_dndi[index].dw4.minimum_stmm = 0;
2724
2725     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2726     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2727     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2728     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2729
2730     sampler_dndi[index].dw6.dn_enable = 1;
2731     sampler_dndi[index].dw6.di_enable = 1;
2732     sampler_dndi[index].dw6.di_partial = 0;
2733     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2734     sampler_dndi[index].dw6.dndi_stream_id = 0;
2735     sampler_dndi[index].dw6.dndi_first_frame = 1;
2736     sampler_dndi[index].dw6.progressive_dn = 0;
2737     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2738     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2739     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2740
2741     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2742     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2743     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2744     sampler_dndi[index].dw7.column_width_minus1 = 0;
2745
2746     dri_bo_unmap(pp_context->sampler_state_table.bo);
2747
2748     /* private function & data */
2749     pp_context->pp_x_steps = pp_dndi_x_steps;
2750     pp_context->pp_y_steps = pp_dndi_y_steps;
2751     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2752
2753     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2754     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2755     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2756     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2757
2758     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2759     pp_inline_parameter->grf5.number_blocks = w / 16;
2760     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2761     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2762
2763     pp_dndi_context->dest_w = w;
2764     pp_dndi_context->dest_h = h;
2765
2766     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2767
2768     return VA_STATUS_SUCCESS;
2769 }
2770
2771 static int
2772 pp_dn_x_steps(void *private_context)
2773 {
2774     return 1;
2775 }
2776
2777 static int
2778 pp_dn_y_steps(void *private_context)
2779 {
2780     struct pp_dn_context *pp_dn_context = private_context;
2781
2782     return pp_dn_context->dest_h / 8;
2783 }
2784
2785 static int
2786 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2787 {
2788     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2789
2790     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2791     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2792
2793     return 0;
2794 }
2795
2796 static VAStatus
2797 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2798                       const struct i965_surface *src_surface,
2799                       const VARectangle *src_rect,
2800                       struct i965_surface *dst_surface,
2801                       const VARectangle *dst_rect,
2802                       void *filter_param)
2803 {
2804     struct i965_driver_data *i965 = i965_driver_data(ctx);
2805     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2806     struct object_surface *obj_surface;
2807     struct i965_sampler_dndi *sampler_dndi;
2808     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2809     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2810     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2811     int index;
2812     int w, h;
2813     int orig_w, orig_h;
2814     int dn_strength = 15;
2815     int dndi_top_first = 1;
2816     int dn_progressive = 0;
2817
2818     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2819         dndi_top_first = 1;
2820         dn_progressive = 1;
2821     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2822         dndi_top_first = 1;
2823         dn_progressive = 0;
2824     } else {
2825         dndi_top_first = 0;
2826         dn_progressive = 0;
2827     }
2828
2829     if (dn_filter_param) {
2830         float value = dn_filter_param->value;
2831         
2832         if (value > 1.0)
2833             value = 1.0;
2834         
2835         if (value < 0.0)
2836             value = 0.0;
2837
2838         dn_strength = (int)(value * 31.0F);
2839     }
2840
2841     /* surface */
2842     obj_surface = SURFACE(src_surface->id);
2843     orig_w = obj_surface->orig_width;
2844     orig_h = obj_surface->orig_height;
2845     w = obj_surface->width;
2846     h = obj_surface->height;
2847
2848     if (pp_context->stmm.bo == NULL) {
2849         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2850                                            "STMM surface",
2851                                            w * h,
2852                                            4096);
2853         assert(pp_context->stmm.bo);
2854     }
2855
2856     /* source UV surface index 2 */
2857     i965_pp_set_surface_state(ctx, pp_context,
2858                               obj_surface->bo, w * h,
2859                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2860                               2, 0);
2861
2862     /* source YUV surface index 4 */
2863     i965_pp_set_surface2_state(ctx, pp_context,
2864                                obj_surface->bo, 0,
2865                                orig_w, orig_h, w,
2866                                0, h,
2867                                SURFACE_FORMAT_PLANAR_420_8, 1,
2868                                4);
2869
2870     /* source STMM surface index 20 */
2871     i965_pp_set_surface_state(ctx, pp_context,
2872                               pp_context->stmm.bo, 0,
2873                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2874                               20, 1);
2875
2876     /* destination surface */
2877     obj_surface = SURFACE(dst_surface->id);
2878     orig_w = obj_surface->orig_width;
2879     orig_h = obj_surface->orig_height;
2880     w = obj_surface->width;
2881     h = obj_surface->height;
2882
2883     /* destination Y surface index 7 */
2884     i965_pp_set_surface_state(ctx, pp_context,
2885                               obj_surface->bo, 0,
2886                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2887                               7, 1);
2888
2889     /* destination UV surface index 8 */
2890     i965_pp_set_surface_state(ctx, pp_context,
2891                               obj_surface->bo, w * h,
2892                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2893                               8, 1);
2894     /* sampler dn */
2895     dri_bo_map(pp_context->sampler_state_table.bo, True);
2896     assert(pp_context->sampler_state_table.bo->virtual);
2897     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2898     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2899
2900     /* sample dndi index 1 */
2901     index = 0;
2902     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2903     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2904     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2905     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2906
2907     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2908     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2909     sampler_dndi[index].dw1.stmm_c2 = 0;
2910     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2911     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2912
2913     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2914     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2915     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2916     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2917
2918     sampler_dndi[index].dw3.maximum_stmm = 128;
2919     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2920     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2921     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2922     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2923
2924     sampler_dndi[index].dw4.sdi_delta = 8;
2925     sampler_dndi[index].dw4.sdi_threshold = 128;
2926     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2927     sampler_dndi[index].dw4.stmm_shift_up = 0;
2928     sampler_dndi[index].dw4.stmm_shift_down = 0;
2929     sampler_dndi[index].dw4.minimum_stmm = 0;
2930
2931     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2932     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2933     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2934     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2935
2936     sampler_dndi[index].dw6.dn_enable = 1;
2937     sampler_dndi[index].dw6.di_enable = 0;
2938     sampler_dndi[index].dw6.di_partial = 0;
2939     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2940     sampler_dndi[index].dw6.dndi_stream_id = 1;
2941     sampler_dndi[index].dw6.dndi_first_frame = 1;
2942     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2943     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2944     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2945     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2946
2947     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2948     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2949     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2950     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2951
2952     dri_bo_unmap(pp_context->sampler_state_table.bo);
2953
2954     /* private function & data */
2955     pp_context->pp_x_steps = pp_dn_x_steps;
2956     pp_context->pp_y_steps = pp_dn_y_steps;
2957     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2958
2959     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2960     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2961     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2962     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2963
2964     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2965     pp_inline_parameter->grf5.number_blocks = w / 16;
2966     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2967     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2968
2969     pp_dn_context->dest_w = w;
2970     pp_dn_context->dest_h = h;
2971
2972     dst_surface->flags = src_surface->flags;
2973     
2974     return VA_STATUS_SUCCESS;
2975 }
2976
2977 static int
2978 gen7_pp_dndi_x_steps(void *private_context)
2979 {
2980     struct pp_dndi_context *pp_dndi_context = private_context;
2981
2982     return pp_dndi_context->dest_w / 16;
2983 }
2984
2985 static int
2986 gen7_pp_dndi_y_steps(void *private_context)
2987 {
2988     struct pp_dndi_context *pp_dndi_context = private_context;
2989
2990     return pp_dndi_context->dest_h / 4;
2991 }
2992
2993 static int
2994 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2995 {
2996     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2997
2998     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2999     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3000
3001     return 0;
3002 }
3003
3004 static VAStatus
3005 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3006                              const struct i965_surface *src_surface,
3007                              const VARectangle *src_rect,
3008                              struct i965_surface *dst_surface,
3009                              const VARectangle *dst_rect,
3010                              void *filter_param)
3011 {
3012     struct i965_driver_data *i965 = i965_driver_data(ctx);
3013     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3014     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3015     struct object_surface *obj_surface;
3016     struct gen7_sampler_dndi *sampler_dndi;
3017     int index;
3018     int w, h;
3019     int orig_w, orig_h;
3020     int dndi_top_first = 1;
3021
3022     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3023         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3024
3025     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3026         dndi_top_first = 1;
3027     else
3028         dndi_top_first = 0;
3029
3030     /* surface */
3031     obj_surface = SURFACE(src_surface->id);
3032     orig_w = obj_surface->orig_width;
3033     orig_h = obj_surface->orig_height;
3034     w = obj_surface->width;
3035     h = obj_surface->height;
3036
3037     if (pp_context->stmm.bo == NULL) {
3038         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3039                                            "STMM surface",
3040                                            w * h,
3041                                            4096);
3042         assert(pp_context->stmm.bo);
3043     }
3044
3045     /* source UV surface index 1 */
3046     gen7_pp_set_surface_state(ctx, pp_context,
3047                               obj_surface->bo, w * h,
3048                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3049                               1, 0);
3050
3051     /* source YUV surface index 3 */
3052     gen7_pp_set_surface2_state(ctx, pp_context,
3053                                obj_surface->bo, 0,
3054                                orig_w, orig_h, w,
3055                                0, h,
3056                                SURFACE_FORMAT_PLANAR_420_8, 1,
3057                                3);
3058
3059     /* source (temporal reference) YUV surface index 4 */
3060     gen7_pp_set_surface2_state(ctx, pp_context,
3061                                obj_surface->bo, 0,
3062                                orig_w, orig_h, w,
3063                                0, h,
3064                                SURFACE_FORMAT_PLANAR_420_8, 1,
3065                                4);
3066
3067     /* STMM / History Statistics input surface, index 5 */
3068     gen7_pp_set_surface_state(ctx, pp_context,
3069                               pp_context->stmm.bo, 0,
3070                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3071                               5, 1);
3072
3073     /* destination surface */
3074     obj_surface = SURFACE(dst_surface->id);
3075     orig_w = obj_surface->orig_width;
3076     orig_h = obj_surface->orig_height;
3077     w = obj_surface->width;
3078     h = obj_surface->height;
3079
3080     /* destination(Previous frame) Y surface index 27 */
3081     gen7_pp_set_surface_state(ctx, pp_context,
3082                               obj_surface->bo, 0,
3083                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3084                               27, 1);
3085
3086     /* destination(Previous frame) UV surface index 28 */
3087     gen7_pp_set_surface_state(ctx, pp_context,
3088                               obj_surface->bo, w * h,
3089                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3090                               28, 1);
3091
3092     /* destination(Current frame) Y surface index 30 */
3093     gen7_pp_set_surface_state(ctx, pp_context,
3094                               obj_surface->bo, 0,
3095                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3096                               30, 1);
3097
3098     /* destination(Current frame) UV surface index 31 */
3099     gen7_pp_set_surface_state(ctx, pp_context,
3100                               obj_surface->bo, w * h,
3101                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3102                               31, 1);
3103
3104     /* STMM output surface, index 33 */
3105     gen7_pp_set_surface_state(ctx, pp_context,
3106                               pp_context->stmm.bo, 0,
3107                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3108                               33, 1);
3109
3110
3111     /* sampler dndi */
3112     dri_bo_map(pp_context->sampler_state_table.bo, True);
3113     assert(pp_context->sampler_state_table.bo->virtual);
3114     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3115     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3116
3117     /* sample dndi index 0 */
3118     index = 0;
3119     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3120     sampler_dndi[index].dw0.dnmh_delt = 8;
3121     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3122     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3123     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3124     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3125
3126     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3127     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3128     sampler_dndi[index].dw1.stmm_c2 = 0;
3129     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3130     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3131
3132     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3133     sampler_dndi[index].dw2.bne_edge_th = 1;
3134     sampler_dndi[index].dw2.smooth_mv_th = 0;
3135     sampler_dndi[index].dw2.sad_tight_th = 5;
3136     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3137     sampler_dndi[index].dw2.good_neighbor_th = 4;
3138
3139     sampler_dndi[index].dw3.maximum_stmm = 128;
3140     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3141     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3142     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3143     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3144
3145     sampler_dndi[index].dw4.sdi_delta = 8;
3146     sampler_dndi[index].dw4.sdi_threshold = 128;
3147     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3148     sampler_dndi[index].dw4.stmm_shift_up = 0;
3149     sampler_dndi[index].dw4.stmm_shift_down = 0;
3150     sampler_dndi[index].dw4.minimum_stmm = 0;
3151
3152     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3153     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3154     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3155     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3156
3157     sampler_dndi[index].dw6.dn_enable = 0;
3158     sampler_dndi[index].dw6.di_enable = 1;
3159     sampler_dndi[index].dw6.di_partial = 0;
3160     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3161     sampler_dndi[index].dw6.dndi_stream_id = 1;
3162     sampler_dndi[index].dw6.dndi_first_frame = 1;
3163     sampler_dndi[index].dw6.progressive_dn = 0;
3164     sampler_dndi[index].dw6.mcdi_enable = 0;
3165     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3166     sampler_dndi[index].dw6.cat_th1 = 0;
3167     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3168     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3169
3170     sampler_dndi[index].dw7.sad_tha = 5;
3171     sampler_dndi[index].dw7.sad_thb = 10;
3172     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3173     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3174     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3175     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3176     sampler_dndi[index].dw7.neighborpixel_th = 10;
3177     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3178
3179     dri_bo_unmap(pp_context->sampler_state_table.bo);
3180
3181     /* private function & data */
3182     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3183     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3184     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3185
3186     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3187     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3188     pp_static_parameter->grf1.di_top_field_first = 0;
3189     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3190
3191     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3192     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3193     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3194
3195     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3196     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3197
3198     pp_dndi_context->dest_w = w;
3199     pp_dndi_context->dest_h = h;
3200
3201     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3202
3203     return VA_STATUS_SUCCESS;
3204 }
3205
3206 static int
3207 gen7_pp_dn_x_steps(void *private_context)
3208 {
3209     struct pp_dn_context *pp_dn_context = private_context;
3210
3211     return pp_dn_context->dest_w / 16;
3212 }
3213
3214 static int
3215 gen7_pp_dn_y_steps(void *private_context)
3216 {
3217     struct pp_dn_context *pp_dn_context = private_context;
3218
3219     return pp_dn_context->dest_h / 4;
3220 }
3221
3222 static int
3223 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3224 {
3225     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3226
3227     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3228     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3229
3230     return 0;
3231 }
3232
3233 static VAStatus
3234 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3235                            const struct i965_surface *src_surface,
3236                            const VARectangle *src_rect,
3237                            struct i965_surface *dst_surface,
3238                            const VARectangle *dst_rect,
3239                            void *filter_param)
3240 {
3241     struct i965_driver_data *i965 = i965_driver_data(ctx);
3242     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3243     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3244     struct object_surface *obj_surface;
3245     struct gen7_sampler_dndi *sampler_dn;
3246     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3247     int index;
3248     int w, h;
3249     int orig_w, orig_h;
3250     int dn_strength = 15;
3251     int dndi_top_first = 1;
3252     int dn_progressive = 0;
3253
3254     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3255         dndi_top_first = 1;
3256         dn_progressive = 1;
3257     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3258         dndi_top_first = 1;
3259         dn_progressive = 0;
3260     } else {
3261         dndi_top_first = 0;
3262         dn_progressive = 0;
3263     }
3264
3265     if (dn_filter_param) {
3266         float value = dn_filter_param->value;
3267         
3268         if (value > 1.0)
3269             value = 1.0;
3270         
3271         if (value < 0.0)
3272             value = 0.0;
3273
3274         dn_strength = (int)(value * 31.0F);
3275     }
3276
3277     /* surface */
3278     obj_surface = SURFACE(src_surface->id);
3279     orig_w = obj_surface->orig_width;
3280     orig_h = obj_surface->orig_height;
3281     w = obj_surface->width;
3282     h = obj_surface->height;
3283
3284     if (pp_context->stmm.bo == NULL) {
3285         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3286                                            "STMM surface",
3287                                            w * h,
3288                                            4096);
3289         assert(pp_context->stmm.bo);
3290     }
3291
3292     /* source UV surface index 1 */
3293     gen7_pp_set_surface_state(ctx, pp_context,
3294                               obj_surface->bo, w * h,
3295                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3296                               1, 0);
3297
3298     /* source YUV surface index 3 */
3299     gen7_pp_set_surface2_state(ctx, pp_context,
3300                                obj_surface->bo, 0,
3301                                orig_w, orig_h, w,
3302                                0, h,
3303                                SURFACE_FORMAT_PLANAR_420_8, 1,
3304                                3);
3305
3306     /* source (temporal reference) YUV surface index 4 */
3307     gen7_pp_set_surface2_state(ctx, pp_context,
3308                                obj_surface->bo, 0,
3309                                orig_w, orig_h, w,
3310                                0, h,
3311                                SURFACE_FORMAT_PLANAR_420_8, 1,
3312                                4);
3313
3314     /* STMM / History Statistics input surface, index 5 */
3315     gen7_pp_set_surface_state(ctx, pp_context,
3316                               pp_context->stmm.bo, 0,
3317                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3318                               5, 1);
3319
3320     /* destination surface */
3321     obj_surface = SURFACE(dst_surface->id);
3322     orig_w = obj_surface->orig_width;
3323     orig_h = obj_surface->orig_height;
3324     w = obj_surface->width;
3325     h = obj_surface->height;
3326
3327     /* destination Y surface index 24 */
3328     gen7_pp_set_surface_state(ctx, pp_context,
3329                               obj_surface->bo, 0,
3330                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3331                               24, 1);
3332
3333     /* destination UV surface index 25 */
3334     gen7_pp_set_surface_state(ctx, pp_context,
3335                               obj_surface->bo, w * h,
3336                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3337                               25, 1);
3338
3339     /* sampler dn */
3340     dri_bo_map(pp_context->sampler_state_table.bo, True);
3341     assert(pp_context->sampler_state_table.bo->virtual);
3342     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3343     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3344
3345     /* sample dn index 1 */
3346     index = 0;
3347     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3348     sampler_dn[index].dw0.dnmh_delt = 8;
3349     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3350     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3351     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3352     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3353
3354     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3355     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3356     sampler_dn[index].dw1.stmm_c2 = 0;
3357     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3358     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3359
3360     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3361     sampler_dn[index].dw2.bne_edge_th = 1;
3362     sampler_dn[index].dw2.smooth_mv_th = 0;
3363     sampler_dn[index].dw2.sad_tight_th = 5;
3364     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3365     sampler_dn[index].dw2.good_neighbor_th = 4;
3366
3367     sampler_dn[index].dw3.maximum_stmm = 128;
3368     sampler_dn[index].dw3.multipler_for_vecm = 2;
3369     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3370     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3371     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3372
3373     sampler_dn[index].dw4.sdi_delta = 8;
3374     sampler_dn[index].dw4.sdi_threshold = 128;
3375     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3376     sampler_dn[index].dw4.stmm_shift_up = 0;
3377     sampler_dn[index].dw4.stmm_shift_down = 0;
3378     sampler_dn[index].dw4.minimum_stmm = 0;
3379
3380     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3381     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3382     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3383     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3384
3385     sampler_dn[index].dw6.dn_enable = 1;
3386     sampler_dn[index].dw6.di_enable = 0;
3387     sampler_dn[index].dw6.di_partial = 0;
3388     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3389     sampler_dn[index].dw6.dndi_stream_id = 1;
3390     sampler_dn[index].dw6.dndi_first_frame = 1;
3391     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3392     sampler_dn[index].dw6.mcdi_enable = 0;
3393     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3394     sampler_dn[index].dw6.cat_th1 = 0;
3395     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3396     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3397
3398     sampler_dn[index].dw7.sad_tha = 5;
3399     sampler_dn[index].dw7.sad_thb = 10;
3400     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3401     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3402     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3403     sampler_dn[index].dw7.vdi_walker_enable = 0;
3404     sampler_dn[index].dw7.neighborpixel_th = 10;
3405     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3406
3407     dri_bo_unmap(pp_context->sampler_state_table.bo);
3408
3409     /* private function & data */
3410     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3411     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3412     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3413
3414     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3415     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3416     pp_static_parameter->grf1.di_top_field_first = 0;
3417     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3418
3419     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3420     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3421     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3422
3423     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3424     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3425
3426     pp_dn_context->dest_w = w;
3427     pp_dn_context->dest_h = h;
3428
3429     dst_surface->flags = src_surface->flags;
3430
3431     return VA_STATUS_SUCCESS;
3432 }
3433
3434 static VAStatus
3435 ironlake_pp_initialize(
3436     VADriverContextP   ctx,
3437     struct i965_post_processing_context *pp_context,
3438     const struct i965_surface *src_surface,
3439     const VARectangle *src_rect,
3440     struct i965_surface *dst_surface,
3441     const VARectangle *dst_rect,
3442     int                pp_index,
3443     void *filter_param
3444 )
3445 {
3446     VAStatus va_status;
3447     struct i965_driver_data *i965 = i965_driver_data(ctx);
3448     struct pp_module *pp_module;
3449     dri_bo *bo;
3450     int static_param_size, inline_param_size;
3451
3452     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3453     bo = dri_bo_alloc(i965->intel.bufmgr,
3454                       "surface state & binding table",
3455                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3456                       4096);
3457     assert(bo);
3458     pp_context->surface_state_binding_table.bo = bo;
3459
3460     dri_bo_unreference(pp_context->curbe.bo);
3461     bo = dri_bo_alloc(i965->intel.bufmgr,
3462                       "constant buffer",
3463                       4096, 
3464                       4096);
3465     assert(bo);
3466     pp_context->curbe.bo = bo;
3467
3468     dri_bo_unreference(pp_context->idrt.bo);
3469     bo = dri_bo_alloc(i965->intel.bufmgr, 
3470                       "interface discriptor", 
3471                       sizeof(struct i965_interface_descriptor), 
3472                       4096);
3473     assert(bo);
3474     pp_context->idrt.bo = bo;
3475     pp_context->idrt.num_interface_descriptors = 0;
3476
3477     dri_bo_unreference(pp_context->sampler_state_table.bo);
3478     bo = dri_bo_alloc(i965->intel.bufmgr, 
3479                       "sampler state table", 
3480                       4096,
3481                       4096);
3482     assert(bo);
3483     dri_bo_map(bo, True);
3484     memset(bo->virtual, 0, bo->size);
3485     dri_bo_unmap(bo);
3486     pp_context->sampler_state_table.bo = bo;
3487
3488     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3489     bo = dri_bo_alloc(i965->intel.bufmgr, 
3490                       "sampler 8x8 state ",
3491                       4096,
3492                       4096);
3493     assert(bo);
3494     pp_context->sampler_state_table.bo_8x8 = bo;
3495
3496     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3497     bo = dri_bo_alloc(i965->intel.bufmgr, 
3498                       "sampler 8x8 state ",
3499                       4096,
3500                       4096);
3501     assert(bo);
3502     pp_context->sampler_state_table.bo_8x8_uv = bo;
3503
3504     dri_bo_unreference(pp_context->vfe_state.bo);
3505     bo = dri_bo_alloc(i965->intel.bufmgr, 
3506                       "vfe state", 
3507                       sizeof(struct i965_vfe_state), 
3508                       4096);
3509     assert(bo);
3510     pp_context->vfe_state.bo = bo;
3511
3512     static_param_size = sizeof(struct pp_static_parameter);
3513     inline_param_size = sizeof(struct pp_inline_parameter);
3514
3515     memset(pp_context->pp_static_parameter, 0, static_param_size);
3516     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3517     
3518     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3519     pp_context->current_pp = pp_index;
3520     pp_module = &pp_context->pp_modules[pp_index];
3521     
3522     if (pp_module->initialize)
3523         va_status = pp_module->initialize(ctx, pp_context,
3524                                           src_surface,
3525                                           src_rect,
3526                                           dst_surface,
3527                                           dst_rect,
3528                                           filter_param);
3529     else
3530         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3531
3532     return va_status;
3533 }
3534
3535 static VAStatus
3536 ironlake_post_processing(
3537     VADriverContextP   ctx,
3538     struct i965_post_processing_context *pp_context,
3539     const struct i965_surface *src_surface,
3540     const VARectangle *src_rect,
3541     struct i965_surface *dst_surface,
3542     const VARectangle *dst_rect,
3543     int                pp_index,
3544     void *filter_param
3545 )
3546 {
3547     VAStatus va_status;
3548
3549     va_status = ironlake_pp_initialize(ctx, pp_context,
3550                                        src_surface,
3551                                        src_rect,
3552                                        dst_surface,
3553                                        dst_rect,
3554                                        pp_index,
3555                                        filter_param);
3556
3557     if (va_status == VA_STATUS_SUCCESS) {
3558         ironlake_pp_states_setup(ctx, pp_context);
3559         ironlake_pp_pipeline_setup(ctx, pp_context);
3560     }
3561
3562     return va_status;
3563 }
3564
3565 static VAStatus
3566 gen6_pp_initialize(
3567     VADriverContextP   ctx,
3568     struct i965_post_processing_context *pp_context,
3569     const struct i965_surface *src_surface,
3570     const VARectangle *src_rect,
3571     struct i965_surface *dst_surface,
3572     const VARectangle *dst_rect,
3573     int                pp_index,
3574     void *filter_param
3575 )
3576 {
3577     VAStatus va_status;
3578     struct i965_driver_data *i965 = i965_driver_data(ctx);
3579     struct pp_module *pp_module;
3580     dri_bo *bo;
3581     int static_param_size, inline_param_size;
3582
3583     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3584     bo = dri_bo_alloc(i965->intel.bufmgr,
3585                       "surface state & binding table",
3586                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3587                       4096);
3588     assert(bo);
3589     pp_context->surface_state_binding_table.bo = bo;
3590
3591     dri_bo_unreference(pp_context->curbe.bo);
3592     bo = dri_bo_alloc(i965->intel.bufmgr,
3593                       "constant buffer",
3594                       4096, 
3595                       4096);
3596     assert(bo);
3597     pp_context->curbe.bo = bo;
3598
3599     dri_bo_unreference(pp_context->idrt.bo);
3600     bo = dri_bo_alloc(i965->intel.bufmgr, 
3601                       "interface discriptor", 
3602                       sizeof(struct gen6_interface_descriptor_data), 
3603                       4096);
3604     assert(bo);
3605     pp_context->idrt.bo = bo;
3606     pp_context->idrt.num_interface_descriptors = 0;
3607
3608     dri_bo_unreference(pp_context->sampler_state_table.bo);
3609     bo = dri_bo_alloc(i965->intel.bufmgr, 
3610                       "sampler state table", 
3611                       4096,
3612                       4096);
3613     assert(bo);
3614     dri_bo_map(bo, True);
3615     memset(bo->virtual, 0, bo->size);
3616     dri_bo_unmap(bo);
3617     pp_context->sampler_state_table.bo = bo;
3618
3619     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3620     bo = dri_bo_alloc(i965->intel.bufmgr, 
3621                       "sampler 8x8 state ",
3622                       4096,
3623                       4096);
3624     assert(bo);
3625     pp_context->sampler_state_table.bo_8x8 = bo;
3626
3627     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3628     bo = dri_bo_alloc(i965->intel.bufmgr, 
3629                       "sampler 8x8 state ",
3630                       4096,
3631                       4096);
3632     assert(bo);
3633     pp_context->sampler_state_table.bo_8x8_uv = bo;
3634
3635     dri_bo_unreference(pp_context->vfe_state.bo);
3636     bo = dri_bo_alloc(i965->intel.bufmgr, 
3637                       "vfe state", 
3638                       sizeof(struct i965_vfe_state), 
3639                       4096);
3640     assert(bo);
3641     pp_context->vfe_state.bo = bo;
3642     
3643     if (IS_GEN7(i965->intel.device_id)) {
3644         static_param_size = sizeof(struct gen7_pp_static_parameter);
3645         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3646     } else {
3647         static_param_size = sizeof(struct pp_static_parameter);
3648         inline_param_size = sizeof(struct pp_inline_parameter);
3649     }
3650
3651     memset(pp_context->pp_static_parameter, 0, static_param_size);
3652     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3653
3654     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3655     pp_context->current_pp = pp_index;
3656     pp_module = &pp_context->pp_modules[pp_index];
3657     
3658     if (pp_module->initialize)
3659         va_status = pp_module->initialize(ctx, pp_context,
3660                                           src_surface,
3661                                           src_rect,
3662                                           dst_surface,
3663                                           dst_rect,
3664                                           filter_param);
3665     else
3666         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3667
3668     calculate_boundary_block_mask(pp_context, dst_rect);
3669     
3670     return va_status;
3671 }
3672
3673 static void
3674 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3675                                    struct i965_post_processing_context *pp_context)
3676 {
3677     struct i965_driver_data *i965 = i965_driver_data(ctx);
3678     struct gen6_interface_descriptor_data *desc;
3679     dri_bo *bo;
3680     int pp_index = pp_context->current_pp;
3681
3682     bo = pp_context->idrt.bo;
3683     dri_bo_map(bo, True);
3684     assert(bo->virtual);
3685     desc = bo->virtual;
3686     memset(desc, 0, sizeof(*desc));
3687     desc->desc0.kernel_start_pointer = 
3688         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3689     desc->desc1.single_program_flow = 1;
3690     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3691     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3692     desc->desc2.sampler_state_pointer = 
3693         pp_context->sampler_state_table.bo->offset >> 5;
3694     desc->desc3.binding_table_entry_count = 0;
3695     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3696     desc->desc4.constant_urb_entry_read_offset = 0;
3697
3698     if (IS_GEN7(i965->intel.device_id))
3699         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3700     else
3701         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3702
3703     dri_bo_emit_reloc(bo,
3704                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3705                       0,
3706                       offsetof(struct gen6_interface_descriptor_data, desc0),
3707                       pp_context->pp_modules[pp_index].kernel.bo);
3708
3709     dri_bo_emit_reloc(bo,
3710                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3711                       desc->desc2.sampler_count << 2,
3712                       offsetof(struct gen6_interface_descriptor_data, desc2),
3713                       pp_context->sampler_state_table.bo);
3714
3715     dri_bo_unmap(bo);
3716     pp_context->idrt.num_interface_descriptors++;
3717 }
3718
3719 static void
3720 gen6_pp_upload_constants(VADriverContextP ctx,
3721                          struct i965_post_processing_context *pp_context)
3722 {
3723     struct i965_driver_data *i965 = i965_driver_data(ctx);
3724     unsigned char *constant_buffer;
3725     int param_size;
3726
3727     assert(sizeof(struct pp_static_parameter) == 128);
3728     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3729
3730     if (IS_GEN7(i965->intel.device_id))
3731         param_size = sizeof(struct gen7_pp_static_parameter);
3732     else
3733         param_size = sizeof(struct pp_static_parameter);
3734
3735     dri_bo_map(pp_context->curbe.bo, 1);
3736     assert(pp_context->curbe.bo->virtual);
3737     constant_buffer = pp_context->curbe.bo->virtual;
3738     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3739     dri_bo_unmap(pp_context->curbe.bo);
3740 }
3741
3742 static void
3743 gen6_pp_states_setup(VADriverContextP ctx,
3744                      struct i965_post_processing_context *pp_context)
3745 {
3746     gen6_pp_interface_descriptor_table(ctx, pp_context);
3747     gen6_pp_upload_constants(ctx, pp_context);
3748 }
3749
3750 static void
3751 gen6_pp_pipeline_select(VADriverContextP ctx,
3752                         struct i965_post_processing_context *pp_context)
3753 {
3754     struct intel_batchbuffer *batch = pp_context->batch;
3755
3756     BEGIN_BATCH(batch, 1);
3757     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3758     ADVANCE_BATCH(batch);
3759 }
3760
3761 static void
3762 gen6_pp_state_base_address(VADriverContextP ctx,
3763                            struct i965_post_processing_context *pp_context)
3764 {
3765     struct intel_batchbuffer *batch = pp_context->batch;
3766
3767     BEGIN_BATCH(batch, 10);
3768     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3769     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3770     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3771     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3772     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3773     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3774     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3775     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3778     ADVANCE_BATCH(batch);
3779 }
3780
3781 static void
3782 gen6_pp_vfe_state(VADriverContextP ctx,
3783                   struct i965_post_processing_context *pp_context)
3784 {
3785     struct intel_batchbuffer *batch = pp_context->batch;
3786
3787     BEGIN_BATCH(batch, 8);
3788     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3789     OUT_BATCH(batch, 0);
3790     OUT_BATCH(batch,
3791               (pp_context->urb.num_vfe_entries - 1) << 16 |
3792               pp_context->urb.num_vfe_entries << 8);
3793     OUT_BATCH(batch, 0);
3794     OUT_BATCH(batch,
3795               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3796               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3797     OUT_BATCH(batch, 0);
3798     OUT_BATCH(batch, 0);
3799     OUT_BATCH(batch, 0);
3800     ADVANCE_BATCH(batch);
3801 }
3802
3803 static void
3804 gen6_pp_curbe_load(VADriverContextP ctx,
3805                    struct i965_post_processing_context *pp_context)
3806 {
3807     struct intel_batchbuffer *batch = pp_context->batch;
3808
3809     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3810
3811     BEGIN_BATCH(batch, 4);
3812     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3813     OUT_BATCH(batch, 0);
3814     OUT_BATCH(batch,
3815               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3816     OUT_RELOC(batch, 
3817               pp_context->curbe.bo,
3818               I915_GEM_DOMAIN_INSTRUCTION, 0,
3819               0);
3820     ADVANCE_BATCH(batch);
3821 }
3822
3823 static void
3824 gen6_interface_descriptor_load(VADriverContextP ctx,
3825                                struct i965_post_processing_context *pp_context)
3826 {
3827     struct intel_batchbuffer *batch = pp_context->batch;
3828
3829     BEGIN_BATCH(batch, 4);
3830     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3831     OUT_BATCH(batch, 0);
3832     OUT_BATCH(batch,
3833               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3834     OUT_RELOC(batch, 
3835               pp_context->idrt.bo,
3836               I915_GEM_DOMAIN_INSTRUCTION, 0,
3837               0);
3838     ADVANCE_BATCH(batch);
3839 }
3840
3841 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
3842 {
3843     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3844
3845     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3846     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
3847     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
3848     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
3849     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3850     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
3851
3852     /* 1 x N */
3853     if (x_steps == 1) {
3854         if (y == y_steps-1) {
3855             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
3856         }
3857         else {
3858             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
3859         }
3860     }
3861
3862     /* M x 1 */
3863     if (y_steps == 1) {
3864         if (x == 0) { // all blocks in this group are on the left edge
3865             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
3866             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
3867         }
3868         else if (x == x_steps-1) {
3869             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
3870             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
3871         }
3872         else {
3873             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3874             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3875             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
3876         }
3877     }
3878
3879 }
3880
3881 static void
3882 gen6_pp_object_walker(VADriverContextP ctx,
3883                       struct i965_post_processing_context *pp_context)
3884 {
3885     struct i965_driver_data *i965 = i965_driver_data(ctx);
3886     struct intel_batchbuffer *batch = pp_context->batch;
3887     int x, x_steps, y, y_steps;
3888     int param_size, command_length_in_dws;
3889     dri_bo *command_buffer;
3890     unsigned int *command_ptr;
3891
3892     if (IS_GEN7(i965->intel.device_id))
3893         param_size = sizeof(struct gen7_pp_inline_parameter);
3894     else
3895         param_size = sizeof(struct pp_inline_parameter);
3896
3897     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3898     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3899     command_length_in_dws = 6 + (param_size >> 2);
3900     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3901                                   "command objects buffer",
3902                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3903                                   4096);
3904
3905     dri_bo_map(command_buffer, 1);
3906     command_ptr = command_buffer->virtual;
3907
3908     for (y = 0; y < y_steps; y++) {
3909         for (x = 0; x < x_steps; x++) {
3910             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3911                 // some common block parameter update goes here, apply to all pp functions
3912                 update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
3913                 
3914                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3915                 *command_ptr++ = 0;
3916                 *command_ptr++ = 0;
3917                 *command_ptr++ = 0;
3918                 *command_ptr++ = 0;
3919                 *command_ptr++ = 0;
3920                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3921                 command_ptr += (param_size >> 2);
3922             }
3923         }
3924     }
3925
3926     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3927         *command_ptr++ = 0;
3928
3929     *command_ptr = MI_BATCH_BUFFER_END;
3930
3931     dri_bo_unmap(command_buffer);
3932
3933     BEGIN_BATCH(batch, 2);
3934     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3935     OUT_RELOC(batch, command_buffer, 
3936               I915_GEM_DOMAIN_COMMAND, 0, 
3937               0);
3938     ADVANCE_BATCH(batch);
3939     
3940     dri_bo_unreference(command_buffer);
3941
3942     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3943      * will cause control to pass back to ring buffer 
3944      */
3945     intel_batchbuffer_end_atomic(batch);
3946     intel_batchbuffer_flush(batch);
3947     intel_batchbuffer_start_atomic(batch, 0x1000);
3948 }
3949
3950 static void
3951 gen6_pp_pipeline_setup(VADriverContextP ctx,
3952                        struct i965_post_processing_context *pp_context)
3953 {
3954     struct intel_batchbuffer *batch = pp_context->batch;
3955
3956     intel_batchbuffer_start_atomic(batch, 0x1000);
3957     intel_batchbuffer_emit_mi_flush(batch);
3958     gen6_pp_pipeline_select(ctx, pp_context);
3959     gen6_pp_state_base_address(ctx, pp_context);
3960     gen6_pp_vfe_state(ctx, pp_context);
3961     gen6_pp_curbe_load(ctx, pp_context);
3962     gen6_interface_descriptor_load(ctx, pp_context);
3963     gen6_pp_object_walker(ctx, pp_context);
3964     intel_batchbuffer_end_atomic(batch);
3965 }
3966
3967 static VAStatus
3968 gen6_post_processing(
3969     VADriverContextP   ctx,
3970     struct i965_post_processing_context *pp_context,
3971     const struct i965_surface *src_surface,
3972     const VARectangle *src_rect,
3973     struct i965_surface *dst_surface,
3974     const VARectangle *dst_rect,
3975     int                pp_index,
3976     void * filter_param
3977 )
3978 {
3979     VAStatus va_status;
3980     
3981     va_status = gen6_pp_initialize(ctx, pp_context,
3982                                    src_surface,
3983                                    src_rect,
3984                                    dst_surface,
3985                                    dst_rect,
3986                                    pp_index,
3987                                    filter_param);
3988
3989     if (va_status == VA_STATUS_SUCCESS) {
3990         gen6_pp_states_setup(ctx, pp_context);
3991         gen6_pp_pipeline_setup(ctx, pp_context);
3992     }
3993
3994     return va_status;
3995 }
3996
3997 static VAStatus
3998 i965_post_processing_internal(
3999     VADriverContextP   ctx,
4000     struct i965_post_processing_context *pp_context,
4001     const struct i965_surface *src_surface,
4002     const VARectangle *src_rect,
4003     struct i965_surface *dst_surface,
4004     const VARectangle *dst_rect,
4005     int                pp_index,
4006     void *filter_param
4007 )
4008 {
4009     struct i965_driver_data *i965 = i965_driver_data(ctx);
4010     VAStatus va_status;
4011
4012     if (IS_GEN6(i965->intel.device_id) ||
4013         IS_GEN7(i965->intel.device_id))
4014         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4015     else
4016         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4017     
4018     return va_status;
4019 }
4020
4021 VAStatus 
4022 i965_DestroySurfaces(VADriverContextP ctx,
4023                      VASurfaceID *surface_list,
4024                      int num_surfaces);
4025 VAStatus 
4026 i965_CreateSurfaces(VADriverContextP ctx,
4027                     int width,
4028                     int height,
4029                     int format,
4030                     int num_surfaces,
4031                     VASurfaceID *surfaces);
4032
4033 static void
4034 rgb_to_yuv(unsigned int argb,
4035            unsigned char *y,
4036            unsigned char *u,
4037            unsigned char *v,
4038            unsigned char *a)
4039 {
4040     int r = ((argb >> 16) & 0xff);
4041     int g = ((argb >> 8) & 0xff);
4042     int b = ((argb >> 0) & 0xff);
4043     
4044     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4045     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4046     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4047     *a = ((argb >> 24) & 0xff);
4048 }
4049
4050 static void 
4051 i965_vpp_clear_surface(VADriverContextP ctx,
4052                        struct i965_post_processing_context *pp_context,
4053                        VASurfaceID surface,
4054                        unsigned int color)
4055 {
4056     struct i965_driver_data *i965 = i965_driver_data(ctx);
4057     struct intel_batchbuffer *batch = pp_context->batch;
4058     struct object_surface *obj_surface = SURFACE(surface);
4059     unsigned int blt_cmd, br13;
4060     unsigned int tiling = 0, swizzle = 0;
4061     int pitch;
4062     unsigned char y, u, v, a = 0;
4063
4064     /* Currently only support NV12 surface */
4065     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4066         return;
4067
4068     rgb_to_yuv(color, &y, &u, &v, &a);
4069
4070     if (a == 0)
4071         return;
4072
4073     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4074     blt_cmd = XY_COLOR_BLT_CMD;
4075     pitch = obj_surface->width;
4076
4077     if (tiling != I915_TILING_NONE) {
4078         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4079         pitch >>= 2;
4080     }
4081
4082     br13 = 0xf0 << 16;
4083     br13 |= BR13_8;
4084     br13 |= pitch;
4085
4086     if (IS_GEN6(i965->intel.device_id) ||
4087         IS_GEN7(i965->intel.device_id)) {
4088         intel_batchbuffer_start_atomic_blt(batch, 48);
4089         BEGIN_BLT_BATCH(batch, 12);
4090     } else {
4091         intel_batchbuffer_start_atomic(batch, 48);
4092         BEGIN_BATCH(batch, 12);
4093     }
4094
4095     OUT_BATCH(batch, blt_cmd);
4096     OUT_BATCH(batch, br13);
4097     OUT_BATCH(batch,
4098               0 << 16 |
4099               0);
4100     OUT_BATCH(batch,
4101               obj_surface->height << 16 |
4102               obj_surface->width);
4103     OUT_RELOC(batch, obj_surface->bo, 
4104               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4105               0);
4106     OUT_BATCH(batch, y);
4107
4108     br13 = 0xf0 << 16;
4109     br13 |= BR13_565;
4110     br13 |= pitch;
4111
4112     OUT_BATCH(batch, blt_cmd);
4113     OUT_BATCH(batch, br13);
4114     OUT_BATCH(batch,
4115               0 << 16 |
4116               0);
4117     OUT_BATCH(batch,
4118               obj_surface->height / 2 << 16 |
4119               obj_surface->width / 2);
4120     OUT_RELOC(batch, obj_surface->bo, 
4121               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4122               obj_surface->width * obj_surface->y_cb_offset);
4123     OUT_BATCH(batch, v << 8 | u);
4124
4125     ADVANCE_BATCH(batch);
4126     intel_batchbuffer_end_atomic(batch);
4127 }
4128
4129 VASurfaceID
4130 i965_post_processing(
4131     VADriverContextP   ctx,
4132     VASurfaceID        surface,
4133     const VARectangle *src_rect,
4134     const VARectangle *dst_rect,
4135     unsigned int       flags,
4136     int               *has_done_scaling  
4137 )
4138 {
4139     struct i965_driver_data *i965 = i965_driver_data(ctx);
4140     VASurfaceID in_surface_id = surface;
4141     VASurfaceID out_surface_id = VA_INVALID_ID;
4142     
4143     *has_done_scaling = 0;
4144
4145     if (HAS_PP(i965)) {
4146         struct object_surface *obj_surface;
4147         VAStatus status;
4148         struct i965_surface src_surface;
4149         struct i965_surface dst_surface;
4150
4151         obj_surface = SURFACE(in_surface_id);
4152
4153         /* Currently only support post processing for NV12 surface */
4154         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4155             return out_surface_id;
4156
4157         _i965LockMutex(&i965->pp_mutex);
4158
4159         if (flags & I965_PP_FLAG_MCDI) {
4160             status = i965_CreateSurfaces(ctx,
4161                                          obj_surface->orig_width,
4162                                          obj_surface->orig_height,
4163                                          VA_RT_FORMAT_YUV420,
4164                                          1,
4165                                          &out_surface_id);
4166             assert(status == VA_STATUS_SUCCESS);
4167             obj_surface = SURFACE(out_surface_id);
4168             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4169             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4170             src_surface.id = in_surface_id;
4171             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4172             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4173                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4174             dst_surface.id = out_surface_id;
4175             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4176             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4177
4178             i965_post_processing_internal(ctx, i965->pp_context,
4179                                           &src_surface,
4180                                           src_rect,
4181                                           &dst_surface,
4182                                           dst_rect,
4183                                           PP_NV12_DNDI,
4184                                           NULL);
4185         }
4186
4187         if (flags & I965_PP_FLAG_AVS) {
4188             struct i965_render_state *render_state = &i965->render_state;
4189             struct intel_region *dest_region = render_state->draw_region;
4190
4191             if (out_surface_id != VA_INVALID_ID)
4192                 in_surface_id = out_surface_id;
4193
4194             status = i965_CreateSurfaces(ctx,
4195                                          dest_region->width,
4196                                          dest_region->height,
4197                                          VA_RT_FORMAT_YUV420,
4198                                          1,
4199                                          &out_surface_id);
4200             assert(status == VA_STATUS_SUCCESS);
4201             obj_surface = SURFACE(out_surface_id);
4202             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4203             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4204             src_surface.id = in_surface_id;
4205             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4206             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4207             dst_surface.id = out_surface_id;
4208             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4209             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4210
4211             i965_post_processing_internal(ctx, i965->pp_context,
4212                                           &src_surface,
4213                                           src_rect,
4214                                           &dst_surface,
4215                                           dst_rect,
4216                                           PP_NV12_AVS,
4217                                           NULL);
4218
4219             if (in_surface_id != surface)
4220                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4221                 
4222             *has_done_scaling = 1;
4223         }
4224
4225         _i965UnlockMutex(&i965->pp_mutex);
4226     }
4227
4228     return out_surface_id;
4229 }       
4230
4231 static VAStatus
4232 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4233                           const struct i965_surface *src_surface,
4234                           const VARectangle *src_rect,
4235                           struct i965_surface *dst_surface,
4236                           const VARectangle *dst_rect)
4237 {
4238     struct i965_driver_data *i965 = i965_driver_data(ctx);
4239     struct i965_post_processing_context *pp_context = i965->pp_context;
4240     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4241
4242     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4243         i965_post_processing_internal(ctx, i965->pp_context,
4244                                       src_surface,
4245                                       src_rect,
4246                                       dst_surface,
4247                                       dst_rect,
4248                                       PP_RGBX_LOAD_SAVE_NV12,
4249                                       NULL);
4250     } else {
4251         assert(0);
4252         return VA_STATUS_ERROR_UNKNOWN;
4253     }
4254
4255     intel_batchbuffer_flush(pp_context->batch);
4256
4257     return VA_STATUS_SUCCESS;
4258 }
4259
4260 static VAStatus
4261 i965_image_pl3_processing(VADriverContextP ctx,
4262                           const struct i965_surface *src_surface,
4263                           const VARectangle *src_rect,
4264                           struct i965_surface *dst_surface,
4265                           const VARectangle *dst_rect)
4266 {
4267     struct i965_driver_data *i965 = i965_driver_data(ctx);
4268     struct i965_post_processing_context *pp_context = i965->pp_context;
4269     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4270     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4271
4272     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4273         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4274                                                  src_surface,
4275                                                  src_rect,
4276                                                  dst_surface,
4277                                                  dst_rect,
4278                                                  PP_PL3_LOAD_SAVE_N12,
4279                                                  NULL);
4280     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4281                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4282                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4283                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4284         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4285                                                  src_surface,
4286                                                  src_rect,
4287                                                  dst_surface,
4288                                                  dst_rect,
4289                                                  PP_PL3_LOAD_SAVE_PL3,
4290                                                  NULL);
4291     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4292                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4293         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4294                                                  src_surface,
4295                                                  src_rect,
4296                                                  dst_surface,
4297                                                  dst_rect,
4298                                                  PP_PL3_LOAD_SAVE_PA,
4299                                                  NULL);
4300     }
4301     else {
4302         assert(0);
4303     }
4304
4305     intel_batchbuffer_flush(pp_context->batch);
4306
4307     return vaStatus;
4308 }
4309
4310 static VAStatus
4311 i965_image_pl2_processing(VADriverContextP ctx,
4312                           const struct i965_surface *src_surface,
4313                           const VARectangle *src_rect,
4314                           struct i965_surface *dst_surface,
4315                           const VARectangle *dst_rect)
4316 {
4317     struct i965_driver_data *i965 = i965_driver_data(ctx);
4318     struct i965_post_processing_context *pp_context = i965->pp_context;
4319     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4320     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4321
4322     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4323         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4324                                                  src_surface,
4325                                                  src_rect,
4326                                                  dst_surface,
4327                                                  dst_rect,
4328                                                  PP_NV12_LOAD_SAVE_N12,
4329                                                  NULL);
4330     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4331                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4332                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4333                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4334         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4335                                                  src_surface,
4336                                                  src_rect,
4337                                                  dst_surface,
4338                                                  dst_rect,
4339                                                  PP_NV12_LOAD_SAVE_PL3,
4340                                                  NULL);
4341     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4342                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4343         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4344                                                  src_surface,
4345                                                  src_rect,
4346                                                  dst_surface,
4347                                                  dst_rect,
4348                                                  PP_NV12_LOAD_SAVE_PA,
4349                                                      NULL);
4350     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4351                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4352                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4353                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4354         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4355                                       src_surface,
4356                                       src_rect,
4357                                       dst_surface,
4358                                       dst_rect,
4359                                       PP_NV12_LOAD_SAVE_RGBX,
4360                                       NULL);
4361     } else {
4362         assert(0);
4363         return VA_STATUS_ERROR_UNKNOWN;
4364     }
4365
4366     intel_batchbuffer_flush(pp_context->batch);
4367
4368     return vaStatus;
4369 }
4370
4371 static VAStatus
4372 i965_image_pl1_processing(VADriverContextP ctx,
4373                           const struct i965_surface *src_surface,
4374                           const VARectangle *src_rect,
4375                           struct i965_surface *dst_surface,
4376                           const VARectangle *dst_rect)
4377 {
4378     struct i965_driver_data *i965 = i965_driver_data(ctx);
4379     struct i965_post_processing_context *pp_context = i965->pp_context;
4380     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4381
4382     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4383         i965_post_processing_internal(ctx, i965->pp_context,
4384                                       src_surface,
4385                                       src_rect,
4386                                       dst_surface,
4387                                       dst_rect,
4388                                       PP_PA_LOAD_SAVE_NV12,
4389                                       NULL);
4390     }
4391     else if (fourcc == VA_FOURCC_YV12) {
4392         i965_post_processing_internal(ctx, i965->pp_context,
4393                                       src_surface,
4394                                       src_rect,
4395                                       dst_surface,
4396                                       dst_rect,
4397                                       PP_PA_LOAD_SAVE_PL3,
4398                                       NULL);
4399
4400     }
4401     else {
4402         return VA_STATUS_ERROR_UNKNOWN;
4403     }
4404
4405     intel_batchbuffer_flush(pp_context->batch);
4406
4407     return VA_STATUS_SUCCESS;
4408 }
4409
4410 VAStatus
4411 i965_image_processing(VADriverContextP ctx,
4412                       const struct i965_surface *src_surface,
4413                       const VARectangle *src_rect,
4414                       struct i965_surface *dst_surface,
4415                       const VARectangle *dst_rect)
4416 {
4417     struct i965_driver_data *i965 = i965_driver_data(ctx);
4418     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4419
4420     if (HAS_PP(i965)) {
4421         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4422
4423         _i965LockMutex(&i965->pp_mutex);
4424
4425         switch (fourcc) {
4426         case VA_FOURCC('Y', 'V', '1', '2'):
4427         case VA_FOURCC('I', '4', '2', '0'):
4428         case VA_FOURCC('I', 'M', 'C', '1'):
4429         case VA_FOURCC('I', 'M', 'C', '3'):
4430             status = i965_image_pl3_processing(ctx,
4431                                                src_surface,
4432                                                src_rect,
4433                                                dst_surface,
4434                                                dst_rect);
4435             break;
4436
4437         case  VA_FOURCC('N', 'V', '1', '2'):
4438             status = i965_image_pl2_processing(ctx,
4439                                                src_surface,
4440                                                src_rect,
4441                                                dst_surface,
4442                                                dst_rect);
4443             break;
4444         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4445         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4446             status = i965_image_pl1_processing(ctx,
4447                                                src_surface,
4448                                                src_rect,
4449                                                dst_surface,
4450                                                dst_rect);
4451             break;
4452         case VA_FOURCC('B', 'G', 'R', 'A'):
4453         case VA_FOURCC('B', 'G', 'R', 'X'):
4454         case VA_FOURCC('R', 'G', 'B', 'A'):
4455         case VA_FOURCC('R', 'G', 'B', 'X'):
4456             status = i965_image_pl1_rgbx_processing(ctx,
4457                                                src_surface,
4458                                                src_rect,
4459                                                dst_surface,
4460                                                dst_rect);
4461             break;
4462         default:
4463             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4464             break;
4465         }
4466         
4467         _i965UnlockMutex(&i965->pp_mutex);
4468     }
4469
4470     return status;
4471 }       
4472
4473 static void
4474 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4475 {
4476     int i;
4477
4478     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4479     pp_context->surface_state_binding_table.bo = NULL;
4480
4481     dri_bo_unreference(pp_context->curbe.bo);
4482     pp_context->curbe.bo = NULL;
4483
4484     dri_bo_unreference(pp_context->sampler_state_table.bo);
4485     pp_context->sampler_state_table.bo = NULL;
4486
4487     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4488     pp_context->sampler_state_table.bo_8x8 = NULL;
4489
4490     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4491     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4492
4493     dri_bo_unreference(pp_context->idrt.bo);
4494     pp_context->idrt.bo = NULL;
4495     pp_context->idrt.num_interface_descriptors = 0;
4496
4497     dri_bo_unreference(pp_context->vfe_state.bo);
4498     pp_context->vfe_state.bo = NULL;
4499
4500     dri_bo_unreference(pp_context->stmm.bo);
4501     pp_context->stmm.bo = NULL;
4502
4503     for (i = 0; i < NUM_PP_MODULES; i++) {
4504         struct pp_module *pp_module = &pp_context->pp_modules[i];
4505
4506         dri_bo_unreference(pp_module->kernel.bo);
4507         pp_module->kernel.bo = NULL;
4508     }
4509
4510     free(pp_context->pp_static_parameter);
4511     free(pp_context->pp_inline_parameter);
4512     pp_context->pp_static_parameter = NULL;
4513     pp_context->pp_inline_parameter = NULL;
4514 }
4515
4516 Bool
4517 i965_post_processing_terminate(VADriverContextP ctx)
4518 {
4519     struct i965_driver_data *i965 = i965_driver_data(ctx);
4520     struct i965_post_processing_context *pp_context = i965->pp_context;
4521
4522     if (pp_context) {
4523         i965_post_processing_context_finalize(pp_context);
4524         free(pp_context);
4525     }
4526
4527     i965->pp_context = NULL;
4528
4529     return True;
4530 }
4531
4532 static void
4533 i965_post_processing_context_init(VADriverContextP ctx,
4534                                   struct i965_post_processing_context *pp_context,
4535                                   struct intel_batchbuffer *batch)
4536 {
4537     struct i965_driver_data *i965 = i965_driver_data(ctx);
4538     int i;
4539
4540     pp_context->urb.size = URB_SIZE((&i965->intel));
4541     pp_context->urb.num_vfe_entries = 32;
4542     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4543     pp_context->urb.num_cs_entries = 1;
4544     
4545     if (IS_GEN7(i965->intel.device_id))
4546         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4547     else
4548         pp_context->urb.size_cs_entry = 2;
4549
4550     pp_context->urb.vfe_start = 0;
4551     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4552         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4553     assert(pp_context->urb.cs_start + 
4554            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4555
4556     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4557     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4558     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4559
4560     if (IS_GEN7(i965->intel.device_id))
4561         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4562     else if (IS_GEN6(i965->intel.device_id))
4563         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4564     else if (IS_IRONLAKE(i965->intel.device_id))
4565         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4566
4567     for (i = 0; i < NUM_PP_MODULES; i++) {
4568         struct pp_module *pp_module = &pp_context->pp_modules[i];
4569         dri_bo_unreference(pp_module->kernel.bo);
4570         if (pp_module->kernel.bin && pp_module->kernel.size) {
4571             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4572                                                 pp_module->kernel.name,
4573                                                 pp_module->kernel.size,
4574                                                 4096);
4575             assert(pp_module->kernel.bo);
4576             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4577         } else {
4578             pp_module->kernel.bo = NULL;
4579         }
4580     }
4581
4582     /* static & inline parameters */
4583     if (IS_GEN7(i965->intel.device_id)) {
4584         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4585         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4586     } else {
4587         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4588         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4589     }
4590
4591     pp_context->batch = batch;
4592 }
4593
4594 Bool
4595 i965_post_processing_init(VADriverContextP ctx)
4596 {
4597     struct i965_driver_data *i965 = i965_driver_data(ctx);
4598     struct i965_post_processing_context *pp_context = i965->pp_context;
4599
4600     if (HAS_PP(i965)) {
4601         if (pp_context == NULL) {
4602             pp_context = calloc(1, sizeof(*pp_context));
4603             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4604             i965->pp_context = pp_context;
4605         }
4606     }
4607
4608     return True;
4609 }
4610
4611 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4612     PP_NULL,    /* VAProcFilterNone */
4613     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4614     PP_NULL,    /* VAProcFilterDeblocking */
4615     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4616     PP_NULL,    /* VAProcFilterSharpening */
4617     PP_NULL,    /* VAProcFilterColorBalance */
4618     PP_NULL,    /* VAProcFilterColorStandard */
4619     PP_NULL,    /* VAProcFilterFrameRateConversion */
4620 };
4621
4622 static const int proc_frame_to_pp_frame[3] = {
4623     I965_SURFACE_FLAG_FRAME,
4624     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4625     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4626 };
4627
4628 static void 
4629 i965_proc_picture(VADriverContextP ctx, 
4630                   VAProfile profile, 
4631                   union codec_state *codec_state,
4632                   struct hw_context *hw_context)
4633 {
4634     struct i965_driver_data *i965 = i965_driver_data(ctx);
4635     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4636     struct proc_state *proc_state = &codec_state->proc;
4637     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4638     struct object_surface *obj_surface;
4639     struct i965_surface src_surface, dst_surface;
4640     VARectangle src_rect, dst_rect;
4641     VAStatus status;
4642     int i;
4643     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4644     int num_tmp_surfaces = 0;
4645     unsigned int tiling = 0, swizzle = 0;
4646     int in_width, in_height;
4647
4648     assert(pipeline_param->surface != VA_INVALID_ID);
4649     assert(proc_state->current_render_target != VA_INVALID_ID);
4650
4651     obj_surface = SURFACE(pipeline_param->surface);
4652     in_width = obj_surface->orig_width;
4653     in_height = obj_surface->orig_height;
4654     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4655
4656     src_surface.id = pipeline_param->surface;
4657     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4658     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4659
4660     VASurfaceID out_surface_id = VA_INVALID_ID;
4661     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4662         src_surface.id = pipeline_param->surface;
4663         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4664         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4665         src_rect.x = 0;
4666         src_rect.y = 0;
4667         src_rect.width = in_width;
4668         src_rect.height = in_height;
4669
4670         status = i965_CreateSurfaces(ctx,
4671                                      in_width,
4672                                      in_height,
4673                                      VA_RT_FORMAT_YUV420,
4674                                      1,
4675                                      &out_surface_id);
4676         assert(status == VA_STATUS_SUCCESS);
4677         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4678         obj_surface = SURFACE(out_surface_id);
4679         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4680
4681         dst_surface.id = out_surface_id;
4682         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4683         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4684         dst_rect.x = 0;
4685         dst_rect.y = 0;
4686         dst_rect.width = in_width;
4687         dst_rect.height = in_height;
4688
4689         status = i965_image_processing(ctx,
4690                                        &src_surface,
4691                                        &src_rect,
4692                                        &dst_surface,
4693                                        &dst_rect);
4694         assert(status == VA_STATUS_SUCCESS);
4695
4696         src_surface.id = out_surface_id;
4697         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4698         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4699     }
4700
4701     if (pipeline_param->surface_region) {
4702         src_rect.x = pipeline_param->surface_region->x;
4703         src_rect.y = pipeline_param->surface_region->y;
4704         src_rect.width = pipeline_param->surface_region->width;
4705         src_rect.height = pipeline_param->surface_region->height;
4706     } else {
4707         src_rect.x = 0;
4708         src_rect.y = 0;
4709         src_rect.width = in_width;
4710         src_rect.height = in_height;
4711     }
4712
4713     if (pipeline_param->output_region) {
4714         dst_rect.x = pipeline_param->output_region->x;
4715         dst_rect.y = pipeline_param->output_region->y;
4716         dst_rect.width = pipeline_param->output_region->width;
4717         dst_rect.height = pipeline_param->output_region->height;
4718     } else {
4719         dst_rect.x = 0;
4720         dst_rect.y = 0;
4721         dst_rect.width = in_width;
4722         dst_rect.height = in_height;
4723     }
4724
4725     for (i = 0; i < pipeline_param->num_filters; i++) {
4726         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4727         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4728         VAProcFilterType filter_type = filter_param->type;
4729         out_surface_id = VA_INVALID_ID;
4730         int kernel_index = procfilter_to_pp_flag[filter_type];
4731
4732         if (kernel_index != PP_NULL &&
4733             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4734             status = i965_CreateSurfaces(ctx,
4735                                          in_width,
4736                                          in_height,
4737                                          VA_RT_FORMAT_YUV420,
4738                                          1,
4739                                          &out_surface_id);
4740             assert(status == VA_STATUS_SUCCESS);
4741             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4742             obj_surface = SURFACE(out_surface_id);
4743             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4744             dst_surface.id = out_surface_id;
4745             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4746             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4747                                                    &src_surface,
4748                                                    &src_rect,
4749                                                    &dst_surface,
4750                                                    &src_rect,
4751                                                    kernel_index,
4752                                                    filter_param);
4753
4754             if (status == VA_STATUS_SUCCESS) {
4755                 src_surface.id = dst_surface.id;
4756                 src_surface.type = dst_surface.type;
4757                 src_surface.flags = dst_surface.flags;
4758             }
4759         }
4760     }
4761
4762     obj_surface = SURFACE(proc_state->current_render_target);
4763     int csc_needed = 0;
4764     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
4765         csc_needed = 1;
4766         out_surface_id = VA_INVALID_ID;
4767         status = i965_CreateSurfaces(ctx,
4768                                      obj_surface->orig_width,
4769                                      obj_surface->orig_height,
4770                                      VA_RT_FORMAT_YUV420, 
4771                                      1,
4772                                      &out_surface_id);
4773         assert(status == VA_STATUS_SUCCESS);
4774         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4775         struct object_surface *csc_surface = SURFACE(out_surface_id);
4776         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4777         dst_surface.id = out_surface_id;
4778     } else {
4779         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4780         dst_surface.id = proc_state->current_render_target;
4781     }
4782
4783     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4784     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4785
4786     // load/save doesn't support different origin offset for src and dst surface
4787     if (src_rect.width == dst_rect.width &&
4788         src_rect.height == dst_rect.height &&
4789         src_rect.x == dst_rect.x &&
4790         src_rect.y == dst_rect.y) {
4791         i965_post_processing_internal(ctx, &proc_context->pp_context,
4792                                       &src_surface,
4793                                       &src_rect,
4794                                       &dst_surface,
4795                                       &dst_rect,
4796                                       PP_NV12_LOAD_SAVE_N12,
4797                                       NULL);
4798     } else {
4799
4800         i965_post_processing_internal(ctx, &proc_context->pp_context,
4801                                       &src_surface,
4802                                       &src_rect,
4803                                       &dst_surface,
4804                                       &dst_rect,
4805                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4806                                       PP_NV12_AVS : PP_NV12_SCALING,
4807                                       NULL);
4808     }
4809
4810     if (csc_needed) {
4811         src_surface.id = dst_surface.id;
4812         src_surface.type = dst_surface.type;
4813         src_surface.flags = dst_surface.flags;
4814         dst_surface.id = proc_state->current_render_target;
4815         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4816         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
4817     }
4818     
4819     if (num_tmp_surfaces)
4820         i965_DestroySurfaces(ctx,
4821                              tmp_surfaces,
4822                              num_tmp_surfaces);
4823
4824     intel_batchbuffer_flush(hw_context->batch);
4825 }
4826
4827 static void
4828 i965_proc_context_destroy(void *hw_context)
4829 {
4830     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4831
4832     i965_post_processing_context_finalize(&proc_context->pp_context);
4833     intel_batchbuffer_free(proc_context->base.batch);
4834     free(proc_context);
4835 }
4836
4837 struct hw_context *
4838 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4839 {
4840     struct intel_driver_data *intel = intel_driver_data(ctx);
4841     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4842
4843     proc_context->base.destroy = i965_proc_context_destroy;
4844     proc_context->base.run = i965_proc_picture;
4845     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4846     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4847
4848     return (struct hw_context *)proc_context;
4849 }