VEBOX: Enable vebox pipeline for video process on HSW
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 };
639
640 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
641                                            const struct i965_surface *src_surface,
642                                            const VARectangle *src_rect,
643                                            struct i965_surface *dst_surface,
644                                            const VARectangle *dst_rect,
645                                            void *filter_param);
646 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
647                                              const struct i965_surface *src_surface,
648                                              const VARectangle *src_rect,
649                                              struct i965_surface *dst_surface,
650                                              const VARectangle *dst_rect,
651                                              void *filter_param);
652 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
653                                            const struct i965_surface *src_surface,
654                                            const VARectangle *src_rect,
655                                            struct i965_surface *dst_surface,
656                                            const VARectangle *dst_rect,
657                                            void *filter_param);
658
659 static struct pp_module pp_modules_gen7[] = {
660     {
661         {
662             "NULL module (for testing)",
663             PP_NULL,
664             pp_null_gen7,
665             sizeof(pp_null_gen7),
666             NULL,
667         },
668
669         pp_null_initialize,
670     },
671
672     {
673         {
674             "NV12_NV12",
675             PP_NV12_LOAD_SAVE_N12,
676             pp_nv12_load_save_nv12_gen7,
677             sizeof(pp_nv12_load_save_nv12_gen7),
678             NULL,
679         },
680
681         gen7_pp_plx_avs_initialize,
682     },
683
684     {
685         {
686             "NV12_PL3",
687             PP_NV12_LOAD_SAVE_PL3,
688             pp_nv12_load_save_pl3_gen7,
689             sizeof(pp_nv12_load_save_pl3_gen7),
690             NULL,
691         },
692         
693         gen7_pp_plx_avs_initialize,
694     },
695
696     {
697         {
698             "PL3_NV12",
699             PP_PL3_LOAD_SAVE_N12,
700             pp_pl3_load_save_nv12_gen7,
701             sizeof(pp_pl3_load_save_nv12_gen7),
702             NULL,
703         },
704
705         gen7_pp_plx_avs_initialize,
706     },
707
708     {
709         {
710             "PL3_PL3",
711             PP_PL3_LOAD_SAVE_N12,
712             pp_pl3_load_save_pl3_gen7,
713             sizeof(pp_pl3_load_save_pl3_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12 Scaling module",
723             PP_NV12_SCALING,
724             pp_nv12_scaling_gen7,
725             sizeof(pp_nv12_scaling_gen7),
726             NULL,
727         },
728
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "NV12 AVS module",
735             PP_NV12_AVS,
736             pp_nv12_avs_gen7,
737             sizeof(pp_nv12_avs_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "NV12 DNDI module",
747             PP_NV12_DNDI,
748             pp_nv12_dndi_gen7,
749             sizeof(pp_nv12_dndi_gen7),
750             NULL,
751         },
752
753         gen7_pp_nv12_dndi_initialize,
754     },
755
756     {
757         {
758             "NV12 DN module",
759             PP_NV12_DN,
760             pp_nv12_dn_gen7,
761             sizeof(pp_nv12_dn_gen7),
762             NULL,
763         },
764
765         gen7_pp_nv12_dn_initialize,
766     },
767     {
768         {
769             "NV12_PA module",
770             PP_NV12_LOAD_SAVE_PA,
771             pp_nv12_load_save_pa_gen7,
772             sizeof(pp_nv12_load_save_pa_gen7),
773             NULL,
774         },
775     
776         gen7_pp_plx_avs_initialize,
777     },
778
779     {
780         {
781             "PL3_PA module",
782             PP_PL3_LOAD_SAVE_PA,
783             pp_pl3_load_save_pa_gen7,
784             sizeof(pp_pl3_load_save_pa_gen7),
785             NULL,
786         },
787     
788         gen7_pp_plx_avs_initialize,
789     },
790
791     {
792         {
793             "PA_NV12 module",
794             PP_PA_LOAD_SAVE_NV12,
795             pp_pa_load_save_nv12_gen7,
796             sizeof(pp_pa_load_save_nv12_gen7),
797             NULL,
798         },
799     
800         gen7_pp_plx_avs_initialize,
801     },
802
803     {
804         {
805             "PA_PL3 module",
806             PP_PA_LOAD_SAVE_PL3,
807             pp_pa_load_save_pl3_gen7,
808             sizeof(pp_pa_load_save_pl3_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814     
815     {
816         {
817             "RGBX_NV12 module",
818             PP_RGBX_LOAD_SAVE_NV12,
819             pp_rgbx_load_save_nv12_gen7,
820             sizeof(pp_rgbx_load_save_nv12_gen7),
821             NULL,
822         },
823     
824         pp_plx_load_save_plx_initialize,
825     },
826
827     {
828         {
829             "NV12_RGBX module",
830             PP_NV12_LOAD_SAVE_RGBX,
831             pp_nv12_load_save_rgbx_gen7,
832             sizeof(pp_nv12_load_save_rgbx_gen7),
833             NULL,
834         },
835     
836         pp_plx_load_save_plx_initialize,
837     },
838             
839 };
840
841 static int
842 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     int fourcc;
846
847     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
848         struct object_image *obj_image = IMAGE(surface->id);
849         fourcc = obj_image->image.format.fourcc;
850     } else {
851         struct object_surface *obj_surface = SURFACE(surface->id);
852         fourcc = obj_surface->fourcc;
853     }
854
855     return fourcc;
856 }
857
858 static void
859 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
860 {
861     switch (tiling) {
862     case I915_TILING_NONE:
863         ss->ss3.tiled_surface = 0;
864         ss->ss3.tile_walk = 0;
865         break;
866     case I915_TILING_X:
867         ss->ss3.tiled_surface = 1;
868         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
869         break;
870     case I915_TILING_Y:
871         ss->ss3.tiled_surface = 1;
872         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
873         break;
874     }
875 }
876
877 static void
878 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
879 {
880     switch (tiling) {
881     case I915_TILING_NONE:
882         ss->ss2.tiled_surface = 0;
883         ss->ss2.tile_walk = 0;
884         break;
885     case I915_TILING_X:
886         ss->ss2.tiled_surface = 1;
887         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
888         break;
889     case I915_TILING_Y:
890         ss->ss2.tiled_surface = 1;
891         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
892         break;
893     }
894 }
895
896 static void
897 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
898 {
899     switch (tiling) {
900     case I915_TILING_NONE:
901         ss->ss0.tiled_surface = 0;
902         ss->ss0.tile_walk = 0;
903         break;
904     case I915_TILING_X:
905         ss->ss0.tiled_surface = 1;
906         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
907         break;
908     case I915_TILING_Y:
909         ss->ss0.tiled_surface = 1;
910         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
911         break;
912     }
913 }
914
915 static void
916 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
917 {
918     switch (tiling) {
919     case I915_TILING_NONE:
920         ss->ss2.tiled_surface = 0;
921         ss->ss2.tile_walk = 0;
922         break;
923     case I915_TILING_X:
924         ss->ss2.tiled_surface = 1;
925         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
926         break;
927     case I915_TILING_Y:
928         ss->ss2.tiled_surface = 1;
929         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
930         break;
931     }
932 }
933
934 static void
935 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
936 {
937     struct i965_interface_descriptor *desc;
938     dri_bo *bo;
939     int pp_index = pp_context->current_pp;
940
941     bo = pp_context->idrt.bo;
942     dri_bo_map(bo, 1);
943     assert(bo->virtual);
944     desc = bo->virtual;
945     memset(desc, 0, sizeof(*desc));
946     desc->desc0.grf_reg_blocks = 10;
947     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
948     desc->desc1.const_urb_entry_read_offset = 0;
949     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
950     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
951     desc->desc2.sampler_count = 0;
952     desc->desc3.binding_table_entry_count = 0;
953     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
954
955     dri_bo_emit_reloc(bo,
956                       I915_GEM_DOMAIN_INSTRUCTION, 0,
957                       desc->desc0.grf_reg_blocks,
958                       offsetof(struct i965_interface_descriptor, desc0),
959                       pp_context->pp_modules[pp_index].kernel.bo);
960
961     dri_bo_emit_reloc(bo,
962                       I915_GEM_DOMAIN_INSTRUCTION, 0,
963                       desc->desc2.sampler_count << 2,
964                       offsetof(struct i965_interface_descriptor, desc2),
965                       pp_context->sampler_state_table.bo);
966
967     dri_bo_unmap(bo);
968     pp_context->idrt.num_interface_descriptors++;
969 }
970
971 static void
972 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
973 {
974     struct i965_vfe_state *vfe_state;
975     dri_bo *bo;
976
977     bo = pp_context->vfe_state.bo;
978     dri_bo_map(bo, 1);
979     assert(bo->virtual);
980     vfe_state = bo->virtual;
981     memset(vfe_state, 0, sizeof(*vfe_state));
982     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
983     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
984     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
985     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
986     vfe_state->vfe1.children_present = 0;
987     vfe_state->vfe2.interface_descriptor_base = 
988         pp_context->idrt.bo->offset >> 4; /* reloc */
989     dri_bo_emit_reloc(bo,
990                       I915_GEM_DOMAIN_INSTRUCTION, 0,
991                       0,
992                       offsetof(struct i965_vfe_state, vfe2),
993                       pp_context->idrt.bo);
994     dri_bo_unmap(bo);
995 }
996
997 static void
998 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
999 {
1000     unsigned char *constant_buffer;
1001     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1002
1003     assert(sizeof(*pp_static_parameter) == 128);
1004     dri_bo_map(pp_context->curbe.bo, 1);
1005     assert(pp_context->curbe.bo->virtual);
1006     constant_buffer = pp_context->curbe.bo->virtual;
1007     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1008     dri_bo_unmap(pp_context->curbe.bo);
1009 }
1010
1011 static void
1012 ironlake_pp_states_setup(VADriverContextP ctx,
1013                          struct i965_post_processing_context *pp_context)
1014 {
1015     ironlake_pp_interface_descriptor_table(pp_context);
1016     ironlake_pp_vfe_state(pp_context);
1017     ironlake_pp_upload_constants(pp_context);
1018 }
1019
1020 static void
1021 ironlake_pp_pipeline_select(VADriverContextP ctx,
1022                             struct i965_post_processing_context *pp_context)
1023 {
1024     struct intel_batchbuffer *batch = pp_context->batch;
1025
1026     BEGIN_BATCH(batch, 1);
1027     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1028     ADVANCE_BATCH(batch);
1029 }
1030
1031 static void
1032 ironlake_pp_urb_layout(VADriverContextP ctx,
1033                        struct i965_post_processing_context *pp_context)
1034 {
1035     struct intel_batchbuffer *batch = pp_context->batch;
1036     unsigned int vfe_fence, cs_fence;
1037
1038     vfe_fence = pp_context->urb.cs_start;
1039     cs_fence = pp_context->urb.size;
1040
1041     BEGIN_BATCH(batch, 3);
1042     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1043     OUT_BATCH(batch, 0);
1044     OUT_BATCH(batch, 
1045               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1046               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1047     ADVANCE_BATCH(batch);
1048 }
1049
1050 static void
1051 ironlake_pp_state_base_address(VADriverContextP ctx,
1052                                struct i965_post_processing_context *pp_context)
1053 {
1054     struct intel_batchbuffer *batch = pp_context->batch;
1055
1056     BEGIN_BATCH(batch, 8);
1057     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1058     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1059     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1060     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1061     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1062     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1063     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1064     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1065     ADVANCE_BATCH(batch);
1066 }
1067
1068 static void
1069 ironlake_pp_state_pointers(VADriverContextP ctx,
1070                            struct i965_post_processing_context *pp_context)
1071 {
1072     struct intel_batchbuffer *batch = pp_context->batch;
1073
1074     BEGIN_BATCH(batch, 3);
1075     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1076     OUT_BATCH(batch, 0);
1077     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1078     ADVANCE_BATCH(batch);
1079 }
1080
1081 static void 
1082 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1083                           struct i965_post_processing_context *pp_context)
1084 {
1085     struct intel_batchbuffer *batch = pp_context->batch;
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1089     OUT_BATCH(batch,
1090               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1091               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1092     ADVANCE_BATCH(batch);
1093 }
1094
1095 static void
1096 ironlake_pp_constant_buffer(VADriverContextP ctx,
1097                             struct i965_post_processing_context *pp_context)
1098 {
1099     struct intel_batchbuffer *batch = pp_context->batch;
1100
1101     BEGIN_BATCH(batch, 2);
1102     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1103     OUT_RELOC(batch, pp_context->curbe.bo,
1104               I915_GEM_DOMAIN_INSTRUCTION, 0,
1105               pp_context->urb.size_cs_entry - 1);
1106     ADVANCE_BATCH(batch);    
1107 }
1108
1109 static void
1110 ironlake_pp_object_walker(VADriverContextP ctx,
1111                           struct i965_post_processing_context *pp_context)
1112 {
1113     struct intel_batchbuffer *batch = pp_context->batch;
1114     int x, x_steps, y, y_steps;
1115     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1116
1117     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1118     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1119
1120     for (y = 0; y < y_steps; y++) {
1121         for (x = 0; x < x_steps; x++) {
1122             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1123                 BEGIN_BATCH(batch, 20);
1124                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1125                 OUT_BATCH(batch, 0);
1126                 OUT_BATCH(batch, 0); /* no indirect data */
1127                 OUT_BATCH(batch, 0);
1128
1129                 /* inline data grf 5-6 */
1130                 assert(sizeof(*pp_inline_parameter) == 64);
1131                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1132
1133                 ADVANCE_BATCH(batch);
1134             }
1135         }
1136     }
1137 }
1138
1139 static void
1140 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1141                            struct i965_post_processing_context *pp_context)
1142 {
1143     struct intel_batchbuffer *batch = pp_context->batch;
1144
1145     intel_batchbuffer_start_atomic(batch, 0x1000);
1146     intel_batchbuffer_emit_mi_flush(batch);
1147     ironlake_pp_pipeline_select(ctx, pp_context);
1148     ironlake_pp_state_base_address(ctx, pp_context);
1149     ironlake_pp_state_pointers(ctx, pp_context);
1150     ironlake_pp_urb_layout(ctx, pp_context);
1151     ironlake_pp_cs_urb_layout(ctx, pp_context);
1152     ironlake_pp_constant_buffer(ctx, pp_context);
1153     ironlake_pp_object_walker(ctx, pp_context);
1154     intel_batchbuffer_end_atomic(batch);
1155 }
1156
1157 // update u/v offset when the surface format are packed yuv
1158 static void i965_update_src_surface_static_parameter(
1159     VADriverContextP    ctx, 
1160     struct i965_post_processing_context *pp_context,
1161     const struct i965_surface *surface)
1162 {
1163     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1164     int fourcc = pp_get_surface_fourcc(ctx, surface);
1165
1166     switch (fourcc) {
1167     case VA_FOURCC('Y', 'U', 'Y', '2'):
1168         pp_static_parameter->grf1.source_packed_u_offset = 1;
1169         pp_static_parameter->grf1.source_packed_v_offset = 3;
1170         break;
1171     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1172         pp_static_parameter->grf1.source_packed_y_offset = 1;
1173         pp_static_parameter->grf1.source_packed_v_offset = 2;
1174         break;
1175     case VA_FOURCC('B', 'G', 'R', 'X'):
1176     case VA_FOURCC('B', 'G', 'R', 'A'):
1177         pp_static_parameter->grf1.source_rgb_layout = 0;
1178         break;
1179     case VA_FOURCC('R', 'G', 'B', 'X'):
1180     case VA_FOURCC('R', 'G', 'B', 'A'):
1181         pp_static_parameter->grf1.source_rgb_layout = 1;
1182         break;
1183     default:
1184         break;
1185     }
1186     
1187 }
1188
1189 static void i965_update_dst_surface_static_parameter(
1190     VADriverContextP    ctx, 
1191     struct i965_post_processing_context *pp_context,
1192     const struct i965_surface *surface)
1193 {
1194     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1195     int fourcc = pp_get_surface_fourcc(ctx, surface);
1196
1197     switch (fourcc) {
1198     case VA_FOURCC('Y', 'U', 'Y', '2'):
1199         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1200         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1201         break;
1202     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1203         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1204         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1205         break;
1206     case VA_FOURCC('B', 'G', 'R', 'X'):
1207     case VA_FOURCC('B', 'G', 'R', 'A'):
1208         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1209         break;
1210     case VA_FOURCC('R', 'G', 'B', 'X'):
1211     case VA_FOURCC('R', 'G', 'B', 'A'):
1212         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1213         break;
1214     default:
1215         break;
1216     }
1217     
1218 }
1219
1220 static void
1221 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1222                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1223                           int width, int height, int pitch, int format, 
1224                           int index, int is_target)
1225 {
1226     struct i965_surface_state *ss;
1227     dri_bo *ss_bo;
1228     unsigned int tiling;
1229     unsigned int swizzle;
1230
1231     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1232     ss_bo = pp_context->surface_state_binding_table.bo;
1233     assert(ss_bo);
1234
1235     dri_bo_map(ss_bo, True);
1236     assert(ss_bo->virtual);
1237     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1238     memset(ss, 0, sizeof(*ss));
1239     ss->ss0.surface_type = I965_SURFACE_2D;
1240     ss->ss0.surface_format = format;
1241     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1242     ss->ss2.width = width - 1;
1243     ss->ss2.height = height - 1;
1244     ss->ss3.pitch = pitch - 1;
1245     pp_set_surface_tiling(ss, tiling);
1246     dri_bo_emit_reloc(ss_bo,
1247                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1248                       surf_bo_offset,
1249                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1250                       surf_bo);
1251     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1252     dri_bo_unmap(ss_bo);
1253 }
1254
1255 static void
1256 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1257                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1258                            int width, int height, int wpitch,
1259                            int xoffset, int yoffset,
1260                            int format, int interleave_chroma,
1261                            int index)
1262 {
1263     struct i965_surface_state2 *ss2;
1264     dri_bo *ss2_bo;
1265     unsigned int tiling;
1266     unsigned int swizzle;
1267
1268     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1269     ss2_bo = pp_context->surface_state_binding_table.bo;
1270     assert(ss2_bo);
1271
1272     dri_bo_map(ss2_bo, True);
1273     assert(ss2_bo->virtual);
1274     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1275     memset(ss2, 0, sizeof(*ss2));
1276     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1277     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1278     ss2->ss1.width = width - 1;
1279     ss2->ss1.height = height - 1;
1280     ss2->ss2.pitch = wpitch - 1;
1281     ss2->ss2.interleave_chroma = interleave_chroma;
1282     ss2->ss2.surface_format = format;
1283     ss2->ss3.x_offset_for_cb = xoffset;
1284     ss2->ss3.y_offset_for_cb = yoffset;
1285     pp_set_surface2_tiling(ss2, tiling);
1286     dri_bo_emit_reloc(ss2_bo,
1287                       I915_GEM_DOMAIN_RENDER, 0,
1288                       surf_bo_offset,
1289                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1290                       surf_bo);
1291     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1292     dri_bo_unmap(ss2_bo);
1293 }
1294
1295 static void
1296 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1297                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1298                           int width, int height, int pitch, int format, 
1299                           int index, int is_target)
1300 {
1301     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1302     struct gen7_surface_state *ss;
1303     dri_bo *ss_bo;
1304     unsigned int tiling;
1305     unsigned int swizzle;
1306
1307     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1308     ss_bo = pp_context->surface_state_binding_table.bo;
1309     assert(ss_bo);
1310
1311     dri_bo_map(ss_bo, True);
1312     assert(ss_bo->virtual);
1313     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1314     memset(ss, 0, sizeof(*ss));
1315     ss->ss0.surface_type = I965_SURFACE_2D;
1316     ss->ss0.surface_format = format;
1317     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1318     ss->ss2.width = width - 1;
1319     ss->ss2.height = height - 1;
1320     ss->ss3.pitch = pitch - 1;
1321     gen7_pp_set_surface_tiling(ss, tiling);
1322     if (IS_HASWELL(i965->intel.device_id))
1323         gen7_render_set_surface_scs(ss);
1324     dri_bo_emit_reloc(ss_bo,
1325                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1326                       surf_bo_offset,
1327                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1328                       surf_bo);
1329     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1330     dri_bo_unmap(ss_bo);
1331 }
1332
1333 static void
1334 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1335                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1336                            int width, int height, int wpitch,
1337                            int xoffset, int yoffset,
1338                            int format, int interleave_chroma,
1339                            int index)
1340 {
1341     struct gen7_surface_state2 *ss2;
1342     dri_bo *ss2_bo;
1343     unsigned int tiling;
1344     unsigned int swizzle;
1345
1346     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1347     ss2_bo = pp_context->surface_state_binding_table.bo;
1348     assert(ss2_bo);
1349
1350     dri_bo_map(ss2_bo, True);
1351     assert(ss2_bo->virtual);
1352     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1353     memset(ss2, 0, sizeof(*ss2));
1354     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1355     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1356     ss2->ss1.width = width - 1;
1357     ss2->ss1.height = height - 1;
1358     ss2->ss2.pitch = wpitch - 1;
1359     ss2->ss2.interleave_chroma = interleave_chroma;
1360     ss2->ss2.surface_format = format;
1361     ss2->ss3.x_offset_for_cb = xoffset;
1362     ss2->ss3.y_offset_for_cb = yoffset;
1363     gen7_pp_set_surface2_tiling(ss2, tiling);
1364     dri_bo_emit_reloc(ss2_bo,
1365                       I915_GEM_DOMAIN_RENDER, 0,
1366                       surf_bo_offset,
1367                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1368                       surf_bo);
1369     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1370     dri_bo_unmap(ss2_bo);
1371 }
1372
1373 static void 
1374 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1375                                 const struct i965_surface *surface, 
1376                                 int base_index, int is_target,
1377                                 int *width, int *height, int *pitch, int *offset)
1378 {
1379     struct i965_driver_data *i965 = i965_driver_data(ctx);
1380     struct object_surface *obj_surface;
1381     struct object_image *obj_image;
1382     dri_bo *bo;
1383     int fourcc = pp_get_surface_fourcc(ctx, surface);
1384     const int Y = 0;
1385     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1386     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1387     const int UV = 1;
1388     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1389     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1390     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1391                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1392                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1393                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1394     int scale_factor_of_1st_plane_width_in_byte = 1;
1395                               
1396     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1397         obj_surface = SURFACE(surface->id);
1398         bo = obj_surface->bo;
1399         width[0] = obj_surface->orig_width;
1400         height[0] = obj_surface->orig_height;
1401         pitch[0] = obj_surface->width;
1402         offset[0] = 0;
1403
1404         if (full_packed_format) {
1405             scale_factor_of_1st_plane_width_in_byte = 4; 
1406             pitch[0] = obj_surface->width * 4;
1407         }
1408         else if (packed_yuv ) {
1409             scale_factor_of_1st_plane_width_in_byte =  2; 
1410             pitch[0] = obj_surface->width * 2;
1411         }
1412         else if (interleaved_uv) {
1413             width[1] = obj_surface->orig_width;
1414             height[1] = obj_surface->orig_height / 2;
1415             pitch[1] = obj_surface->width;
1416             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1417         } else {
1418             width[1] = obj_surface->orig_width / 2;
1419             height[1] = obj_surface->orig_height / 2;
1420             pitch[1] = obj_surface->width / 2;
1421             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1422             width[2] = obj_surface->orig_width / 2;
1423             height[2] = obj_surface->orig_height / 2;
1424             pitch[2] = obj_surface->width / 2;
1425             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1426         }
1427     } else {
1428         obj_image = IMAGE(surface->id);
1429         bo = obj_image->bo;
1430         width[0] = obj_image->image.width;
1431         height[0] = obj_image->image.height;
1432         pitch[0] = obj_image->image.pitches[0];
1433         offset[0] = obj_image->image.offsets[0];
1434
1435         if (full_packed_format) {
1436             scale_factor_of_1st_plane_width_in_byte = 4;
1437         }
1438         else if (packed_yuv ) {
1439             scale_factor_of_1st_plane_width_in_byte = 2;
1440         }
1441         else if (interleaved_uv) {
1442             width[1] = obj_image->image.width;
1443             height[1] = obj_image->image.height / 2;
1444             pitch[1] = obj_image->image.pitches[1];
1445             offset[1] = obj_image->image.offsets[1];
1446         } else {
1447             width[1] = obj_image->image.width / 2;
1448             height[1] = obj_image->image.height / 2;
1449             pitch[1] = obj_image->image.pitches[1];
1450             offset[1] = obj_image->image.offsets[1];
1451             width[2] = obj_image->image.width / 2;
1452             height[2] = obj_image->image.height / 2;
1453             pitch[2] = obj_image->image.pitches[2];
1454             offset[2] = obj_image->image.offsets[2];
1455         }
1456     }
1457
1458     /* Y surface */
1459     i965_pp_set_surface_state(ctx, pp_context,
1460                               bo, offset[Y],
1461                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1462                               base_index, is_target);
1463
1464     if (!packed_yuv && !full_packed_format) {
1465         if (interleaved_uv) {
1466             i965_pp_set_surface_state(ctx, pp_context,
1467                                       bo, offset[UV],
1468                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1469                                       base_index + 1, is_target);
1470         } else {
1471             /* U surface */
1472             i965_pp_set_surface_state(ctx, pp_context,
1473                                       bo, offset[U],
1474                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1475                                       base_index + 1, is_target);
1476
1477             /* V surface */
1478             i965_pp_set_surface_state(ctx, pp_context,
1479                                       bo, offset[V],
1480                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1481                                       base_index + 2, is_target);
1482         }
1483     }
1484
1485 }
1486
1487 static void 
1488 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1489                                      const struct i965_surface *surface, 
1490                                      int base_index, int is_target,
1491                                      int *width, int *height, int *pitch, int *offset)
1492 {
1493     struct i965_driver_data *i965 = i965_driver_data(ctx);
1494     struct object_surface *obj_surface;
1495     struct object_image *obj_image;
1496     dri_bo *bo;
1497     int fourcc = pp_get_surface_fourcc(ctx, surface);
1498     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1499                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1500     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1501                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1502     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1503     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1504
1505     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1506         obj_surface = SURFACE(surface->id);
1507         bo = obj_surface->bo;
1508         width[0] = obj_surface->orig_width;
1509         height[0] = obj_surface->orig_height;
1510         pitch[0] = obj_surface->width;
1511         offset[0] = 0;
1512
1513         if (packed_yuv) {
1514             if (is_target)
1515                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1516             else
1517                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1518
1519             pitch[0] = obj_surface->width * 2;
1520         }
1521
1522         width[1] = obj_surface->cb_cr_width;
1523         height[1] = obj_surface->cb_cr_height;
1524         pitch[1] = obj_surface->cb_cr_pitch;
1525         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1526
1527         width[2] = obj_surface->cb_cr_width;
1528         height[2] = obj_surface->cb_cr_height;
1529         pitch[2] = obj_surface->cb_cr_pitch;
1530         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1531     } else {
1532         obj_image = IMAGE(surface->id);
1533         bo = obj_image->bo;
1534         width[0] = obj_image->image.width;
1535         height[0] = obj_image->image.height;
1536         pitch[0] = obj_image->image.pitches[0];
1537         offset[0] = obj_image->image.offsets[0];
1538
1539         if (packed_yuv) {
1540             if (is_target)
1541                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1542             else
1543                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1544         } else if (interleaved_uv) {
1545             width[1] = obj_image->image.width / 2;
1546             height[1] = obj_image->image.height / 2;
1547             pitch[1] = obj_image->image.pitches[1];
1548             offset[1] = obj_image->image.offsets[1];
1549         } else {
1550             width[1] = obj_image->image.width / 2;
1551             height[1] = obj_image->image.height / 2;
1552             pitch[1] = obj_image->image.pitches[U];
1553             offset[1] = obj_image->image.offsets[U];
1554             width[2] = obj_image->image.width / 2;
1555             height[2] = obj_image->image.height / 2;
1556             pitch[2] = obj_image->image.pitches[V];
1557             offset[2] = obj_image->image.offsets[V];
1558         }
1559     }
1560
1561     if (is_target) {
1562         gen7_pp_set_surface_state(ctx, pp_context,
1563                                   bo, 0,
1564                                   width[0] / 4, height[0], pitch[0],
1565                                   I965_SURFACEFORMAT_R8_SINT,
1566                                   base_index, 1);
1567
1568         if (!packed_yuv) {
1569             if (interleaved_uv) {
1570                 gen7_pp_set_surface_state(ctx, pp_context,
1571                                           bo, offset[1],
1572                                           width[1] / 2, height[1], pitch[1],
1573                                           I965_SURFACEFORMAT_R8G8_SINT,
1574                                           base_index + 1, 1);
1575             } else {
1576                 gen7_pp_set_surface_state(ctx, pp_context,
1577                                           bo, offset[1],
1578                                           width[1] / 4, height[1], pitch[1],
1579                                           I965_SURFACEFORMAT_R8_SINT,
1580                                           base_index + 1, 1);
1581                 gen7_pp_set_surface_state(ctx, pp_context,
1582                                           bo, offset[2],
1583                                           width[2] / 4, height[2], pitch[2],
1584                                           I965_SURFACEFORMAT_R8_SINT,
1585                                           base_index + 2, 1);
1586             }
1587         }
1588     } else {
1589         int format0 = SURFACE_FORMAT_Y8_UNORM;
1590
1591         switch (fourcc) {
1592         case VA_FOURCC('Y', 'U', 'Y', '2'):
1593             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1594             break;
1595
1596         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1597             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1598             break;
1599
1600         default:
1601             break;
1602         }
1603
1604         gen7_pp_set_surface2_state(ctx, pp_context,
1605                                    bo, offset[0],
1606                                    width[0], height[0], pitch[0],
1607                                    0, 0,
1608                                    format0, 0,
1609                                    base_index);
1610
1611         if (!packed_yuv) {
1612             if (interleaved_uv) {
1613                 gen7_pp_set_surface2_state(ctx, pp_context,
1614                                            bo, offset[1],
1615                                            width[1], height[1], pitch[1],
1616                                            0, 0,
1617                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1618                                            base_index + 1);
1619             } else {
1620                 gen7_pp_set_surface2_state(ctx, pp_context,
1621                                            bo, offset[1],
1622                                            width[1], height[1], pitch[1],
1623                                            0, 0,
1624                                            SURFACE_FORMAT_R8_UNORM, 0,
1625                                            base_index + 1);
1626                 gen7_pp_set_surface2_state(ctx, pp_context,
1627                                            bo, offset[2],
1628                                            width[2], height[2], pitch[2],
1629                                            0, 0,
1630                                            SURFACE_FORMAT_R8_UNORM, 0,
1631                                            base_index + 2);
1632             }
1633         }
1634     }
1635 }
1636
1637 static int
1638 pp_null_x_steps(void *private_context)
1639 {
1640     return 1;
1641 }
1642
1643 static int
1644 pp_null_y_steps(void *private_context)
1645 {
1646     return 1;
1647 }
1648
1649 static int
1650 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1651 {
1652     return 0;
1653 }
1654
1655 static VAStatus
1656 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1657                    const struct i965_surface *src_surface,
1658                    const VARectangle *src_rect,
1659                    struct i965_surface *dst_surface,
1660                    const VARectangle *dst_rect,
1661                    void *filter_param)
1662 {
1663     /* private function & data */
1664     pp_context->pp_x_steps = pp_null_x_steps;
1665     pp_context->pp_y_steps = pp_null_y_steps;
1666     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1667
1668     dst_surface->flags = src_surface->flags;
1669
1670     return VA_STATUS_SUCCESS;
1671 }
1672
1673 static int
1674 pp_load_save_x_steps(void *private_context)
1675 {
1676     return 1;
1677 }
1678
1679 static int
1680 pp_load_save_y_steps(void *private_context)
1681 {
1682     struct pp_load_save_context *pp_load_save_context = private_context;
1683
1684     return pp_load_save_context->dest_h / 8;
1685 }
1686
1687 static int
1688 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1689 {
1690     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1691     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1692
1693     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1694     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1695
1696     return 0;
1697 }
1698
1699 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1700 {
1701     int i;
1702     /* x offset of dest surface must be dword aligned.
1703      * so we have to extend dst surface on left edge, and mask out pixels not interested
1704      */
1705     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1706         pp_context->block_horizontal_mask_left = 0;
1707         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1708         {
1709             pp_context->block_horizontal_mask_left |= 1<<i;
1710         }
1711     }
1712     else {
1713         pp_context->block_horizontal_mask_left = 0xffff;
1714     }
1715     
1716     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1717     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1718         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1719     }
1720     else {
1721         pp_context->block_horizontal_mask_right = 0xffff;
1722     }
1723     
1724     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1725         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1726     }
1727     else {
1728         pp_context->block_vertical_mask_bottom = 0xff;
1729     }
1730
1731 }
1732 static VAStatus
1733 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1734                                 const struct i965_surface *src_surface,
1735                                 const VARectangle *src_rect,
1736                                 struct i965_surface *dst_surface,
1737                                 const VARectangle *dst_rect,
1738                                 void *filter_param)
1739 {
1740     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1741     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1742     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1743     int width[3], height[3], pitch[3], offset[3];
1744     const int Y = 0;
1745
1746     /* source surface */
1747     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1748                                     width, height, pitch, offset);
1749
1750     /* destination surface */
1751     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1752                                     width, height, pitch, offset);
1753
1754     /* private function & data */
1755     pp_context->pp_x_steps = pp_load_save_x_steps;
1756     pp_context->pp_y_steps = pp_load_save_y_steps;
1757     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1758
1759     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
1760     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
1761     pp_load_save_context->dest_y = dst_rect->y;
1762     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
1763     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
1764
1765     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
1766     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
1767
1768     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1769     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1770
1771     // update u/v offset for packed yuv
1772     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
1773     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
1774
1775     dst_surface->flags = src_surface->flags;
1776
1777     return VA_STATUS_SUCCESS;
1778 }
1779
1780 static int
1781 pp_scaling_x_steps(void *private_context)
1782 {
1783     return 1;
1784 }
1785
1786 static int
1787 pp_scaling_y_steps(void *private_context)
1788 {
1789     struct pp_scaling_context *pp_scaling_context = private_context;
1790
1791     return pp_scaling_context->dest_h / 8;
1792 }
1793
1794 static int
1795 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1796 {
1797     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1798     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1799     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1800     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1801     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1802
1803     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1804     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1805     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1806     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1807     
1808     return 0;
1809 }
1810
1811 static VAStatus
1812 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1813                            const struct i965_surface *src_surface,
1814                            const VARectangle *src_rect,
1815                            struct i965_surface *dst_surface,
1816                            const VARectangle *dst_rect,
1817                            void *filter_param)
1818 {
1819     struct i965_driver_data *i965 = i965_driver_data(ctx);
1820     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1821     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1822     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1823     struct object_surface *obj_surface;
1824     struct i965_sampler_state *sampler_state;
1825     int in_w, in_h, in_wpitch, in_hpitch;
1826     int out_w, out_h, out_wpitch, out_hpitch;
1827
1828     /* source surface */
1829     obj_surface = SURFACE(src_surface->id);
1830     in_w = obj_surface->orig_width;
1831     in_h = obj_surface->orig_height;
1832     in_wpitch = obj_surface->width;
1833     in_hpitch = obj_surface->height;
1834
1835     /* source Y surface index 1 */
1836     i965_pp_set_surface_state(ctx, pp_context,
1837                               obj_surface->bo, 0,
1838                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1839                               1, 0);
1840
1841     /* source UV surface index 2 */
1842     i965_pp_set_surface_state(ctx, pp_context,
1843                               obj_surface->bo, in_wpitch * in_hpitch,
1844                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1845                               2, 0);
1846
1847     /* destination surface */
1848     obj_surface = SURFACE(dst_surface->id);
1849     out_w = obj_surface->orig_width;
1850     out_h = obj_surface->orig_height;
1851     out_wpitch = obj_surface->width;
1852     out_hpitch = obj_surface->height;
1853
1854     /* destination Y surface index 7 */
1855     i965_pp_set_surface_state(ctx, pp_context,
1856                               obj_surface->bo, 0,
1857                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1858                               7, 1);
1859
1860     /* destination UV surface index 8 */
1861     i965_pp_set_surface_state(ctx, pp_context,
1862                               obj_surface->bo, out_wpitch * out_hpitch,
1863                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1864                               8, 1);
1865
1866     /* sampler state */
1867     dri_bo_map(pp_context->sampler_state_table.bo, True);
1868     assert(pp_context->sampler_state_table.bo->virtual);
1869     sampler_state = pp_context->sampler_state_table.bo->virtual;
1870
1871     /* SIMD16 Y index 1 */
1872     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1873     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1874     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1875     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1876     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1877
1878     /* SIMD16 UV index 2 */
1879     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1880     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1881     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1882     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1883     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1884
1885     dri_bo_unmap(pp_context->sampler_state_table.bo);
1886
1887     /* private function & data */
1888     pp_context->pp_x_steps = pp_scaling_x_steps;
1889     pp_context->pp_y_steps = pp_scaling_y_steps;
1890     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1891
1892     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
1893     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
1894     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
1895     pp_scaling_context->dest_y = dst_rect->y;
1896     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
1897     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
1898     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
1899     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1900
1901     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1902
1903     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
1904     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1905     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1906
1907     dst_surface->flags = src_surface->flags;
1908
1909     return VA_STATUS_SUCCESS;
1910 }
1911
1912 static int
1913 pp_avs_x_steps(void *private_context)
1914 {
1915     struct pp_avs_context *pp_avs_context = private_context;
1916
1917     return pp_avs_context->dest_w / 16;
1918 }
1919
1920 static int
1921 pp_avs_y_steps(void *private_context)
1922 {
1923     return 1;
1924 }
1925
1926 static int
1927 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1928 {
1929     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1930     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1931     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1932     float src_x_steping, src_y_steping, video_step_delta;
1933     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1934
1935     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1936         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1937         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1938     } else if (tmp_w >= pp_avs_context->dest_w) {
1939         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1940         pp_inline_parameter->grf6.video_step_delta = 0;
1941         
1942         if (x == 0) {
1943             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1944                 pp_avs_context->src_normalized_x;
1945         } else {
1946             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1947             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1948             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1949                 16 * 15 * video_step_delta / 2;
1950         }
1951     } else {
1952         int n0, n1, n2, nls_left, nls_right;
1953         int factor_a = 5, factor_b = 4;
1954         float f;
1955
1956         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1957         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1958         n2 = tmp_w / (16 * factor_a);
1959         nls_left = n0 + n2;
1960         nls_right = n1 + n2;
1961         f = (float) n2 * 16 / tmp_w;
1962         
1963         if (n0 < 5) {
1964             pp_inline_parameter->grf6.video_step_delta = 0.0;
1965
1966             if (x == 0) {
1967                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1968                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1969             } else {
1970                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1971                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1972                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1973                     16 * 15 * video_step_delta / 2;
1974             }
1975         } else {
1976             if (x < nls_left) {
1977                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1978                 float a = f / (nls_left * 16 * factor_b);
1979                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1980                 
1981                 pp_inline_parameter->grf6.video_step_delta = b;
1982
1983                 if (x == 0) {
1984                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1985                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1986                 } else {
1987                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1988                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1989                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1990                         16 * 15 * video_step_delta / 2;
1991                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1992                 }
1993             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1994                 /* scale the center linearly */
1995                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1996                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1997                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1998                     16 * 15 * video_step_delta / 2;
1999                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2000                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2001             } else {
2002                 float a = f / (nls_right * 16 * factor_b);
2003                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2004
2005                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2006                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2007                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2008                     16 * 15 * video_step_delta / 2;
2009                 pp_inline_parameter->grf6.video_step_delta = -b;
2010
2011                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2012                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2013                 else
2014                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2015             }
2016         }
2017     }
2018
2019     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2020     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2021     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2022     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2023
2024     return 0;
2025 }
2026
2027 static VAStatus
2028 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2029                        const struct i965_surface *src_surface,
2030                        const VARectangle *src_rect,
2031                        struct i965_surface *dst_surface,
2032                        const VARectangle *dst_rect,
2033                        void *filter_param,
2034                        int nlas)
2035 {
2036     struct i965_driver_data *i965 = i965_driver_data(ctx);
2037     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2038     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2039     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2040     struct object_surface *obj_surface;
2041     struct i965_sampler_8x8 *sampler_8x8;
2042     struct i965_sampler_8x8_state *sampler_8x8_state;
2043     int index;
2044     int in_w, in_h, in_wpitch, in_hpitch;
2045     int out_w, out_h, out_wpitch, out_hpitch;
2046     int i;
2047
2048     /* surface */
2049     obj_surface = SURFACE(src_surface->id);
2050     in_w = obj_surface->orig_width;
2051     in_h = obj_surface->orig_height;
2052     in_wpitch = obj_surface->width;
2053     in_hpitch = obj_surface->height;
2054
2055     /* source Y surface index 1 */
2056     i965_pp_set_surface2_state(ctx, pp_context,
2057                                obj_surface->bo, 0,
2058                                in_w, in_h, in_wpitch,
2059                                0, 0,
2060                                SURFACE_FORMAT_Y8_UNORM, 0,
2061                                1);
2062
2063     /* source UV surface index 2 */
2064     i965_pp_set_surface2_state(ctx, pp_context,
2065                                obj_surface->bo, in_wpitch * in_hpitch,
2066                                in_w / 2, in_h / 2, in_wpitch,
2067                                0, 0,
2068                                SURFACE_FORMAT_R8B8_UNORM, 0,
2069                                2);
2070
2071     /* destination surface */
2072     obj_surface = SURFACE(dst_surface->id);
2073     out_w = obj_surface->orig_width;
2074     out_h = obj_surface->orig_height;
2075     out_wpitch = obj_surface->width;
2076     out_hpitch = obj_surface->height;
2077     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2078
2079     /* destination Y surface index 7 */
2080     i965_pp_set_surface_state(ctx, pp_context,
2081                               obj_surface->bo, 0,
2082                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2083                               7, 1);
2084
2085     /* destination UV surface index 8 */
2086     i965_pp_set_surface_state(ctx, pp_context,
2087                               obj_surface->bo, out_wpitch * out_hpitch,
2088                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2089                               8, 1);
2090
2091     /* sampler 8x8 state */
2092     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2093     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2094     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2095     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2096     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2097
2098     for (i = 0; i < 17; i++) {
2099         /* for Y channel, currently ignore */
2100         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2101         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2102         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2103         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2104         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2105         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2106         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2107         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2108         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2109         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2110         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2111         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2112         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2113         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2114         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2115         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2116         /* for U/V channel, 0.25 */
2117         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2118         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2119         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2120         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2121         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2122         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2123         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2124         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2125         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2126         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2127         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2128         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2129         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2130         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2131         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2132         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2133     }
2134
2135     sampler_8x8_state->dw136.default_sharpness_level = 0;
2136     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2137     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2138     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2139     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2140
2141     /* sampler 8x8 */
2142     dri_bo_map(pp_context->sampler_state_table.bo, True);
2143     assert(pp_context->sampler_state_table.bo->virtual);
2144     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2145     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2146
2147     /* sample_8x8 Y index 1 */
2148     index = 1;
2149     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2150     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2151     sampler_8x8[index].dw0.ief_bypass = 1;
2152     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2153     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2154     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2155     sampler_8x8[index].dw2.global_noise_estimation = 22;
2156     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2157     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2158     sampler_8x8[index].dw3.strong_edge_weight = 7;
2159     sampler_8x8[index].dw3.regular_weight = 2;
2160     sampler_8x8[index].dw3.non_edge_weight = 0;
2161     sampler_8x8[index].dw3.gain_factor = 40;
2162     sampler_8x8[index].dw4.steepness_boost = 0;
2163     sampler_8x8[index].dw4.steepness_threshold = 0;
2164     sampler_8x8[index].dw4.mr_boost = 0;
2165     sampler_8x8[index].dw4.mr_threshold = 5;
2166     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2167     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2168     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2169     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2170     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2171     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2172     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2173     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2174     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2175     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2176     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2177     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2178     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2179     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2180     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2181     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2182     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2183     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2184     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2185     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2186     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2187     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2188     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2189     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2190     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2191     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2192     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2193     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2194     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2195     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2196     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2197     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2198     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2199     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2200     sampler_8x8[index].dw13.limiter_boost = 0;
2201     sampler_8x8[index].dw13.minimum_limiter = 10;
2202     sampler_8x8[index].dw13.maximum_limiter = 11;
2203     sampler_8x8[index].dw14.clip_limiter = 130;
2204     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2205                       I915_GEM_DOMAIN_RENDER, 
2206                       0,
2207                       0,
2208                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2209                       pp_context->sampler_state_table.bo_8x8);
2210
2211     /* sample_8x8 UV index 2 */
2212     index = 2;
2213     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2214     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2215     sampler_8x8[index].dw0.ief_bypass = 1;
2216     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2217     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2218     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2219     sampler_8x8[index].dw2.global_noise_estimation = 22;
2220     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2221     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2222     sampler_8x8[index].dw3.strong_edge_weight = 7;
2223     sampler_8x8[index].dw3.regular_weight = 2;
2224     sampler_8x8[index].dw3.non_edge_weight = 0;
2225     sampler_8x8[index].dw3.gain_factor = 40;
2226     sampler_8x8[index].dw4.steepness_boost = 0;
2227     sampler_8x8[index].dw4.steepness_threshold = 0;
2228     sampler_8x8[index].dw4.mr_boost = 0;
2229     sampler_8x8[index].dw4.mr_threshold = 5;
2230     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2231     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2232     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2233     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2234     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2235     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2236     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2237     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2238     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2239     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2240     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2241     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2242     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2243     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2244     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2245     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2246     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2247     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2248     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2249     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2250     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2251     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2252     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2253     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2254     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2255     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2256     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2257     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2258     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2259     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2260     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2261     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2262     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2263     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2264     sampler_8x8[index].dw13.limiter_boost = 0;
2265     sampler_8x8[index].dw13.minimum_limiter = 10;
2266     sampler_8x8[index].dw13.maximum_limiter = 11;
2267     sampler_8x8[index].dw14.clip_limiter = 130;
2268     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2269                       I915_GEM_DOMAIN_RENDER, 
2270                       0,
2271                       0,
2272                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2273                       pp_context->sampler_state_table.bo_8x8);
2274
2275     dri_bo_unmap(pp_context->sampler_state_table.bo);
2276
2277     /* private function & data */
2278     pp_context->pp_x_steps = pp_avs_x_steps;
2279     pp_context->pp_y_steps = pp_avs_y_steps;
2280     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2281
2282     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2283     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2284     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2285     pp_avs_context->dest_y = dst_rect->y;
2286     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2287     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2288     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2289     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2290     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2291     pp_avs_context->src_h = src_rect->height;
2292
2293     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2294     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2295
2296     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2297     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2298     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2299     pp_inline_parameter->grf6.video_step_delta = 0.0;
2300
2301     dst_surface->flags = src_surface->flags;
2302
2303     return VA_STATUS_SUCCESS;
2304 }
2305
2306 static VAStatus
2307 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2308                             const struct i965_surface *src_surface,
2309                             const VARectangle *src_rect,
2310                             struct i965_surface *dst_surface,
2311                             const VARectangle *dst_rect,
2312                             void *filter_param)
2313 {
2314     return pp_nv12_avs_initialize(ctx, pp_context,
2315                                   src_surface,
2316                                   src_rect,
2317                                   dst_surface,
2318                                   dst_rect,
2319                                   filter_param,
2320                                   1);
2321 }
2322
2323 static VAStatus
2324 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2325                              const struct i965_surface *src_surface,
2326                              const VARectangle *src_rect,
2327                              struct i965_surface *dst_surface,
2328                              const VARectangle *dst_rect,
2329                              void *filter_param)
2330 {
2331     return pp_nv12_avs_initialize(ctx, pp_context,
2332                                   src_surface,
2333                                   src_rect,
2334                                   dst_surface,
2335                                   dst_rect,
2336                                   filter_param,
2337                                   0);    
2338 }
2339
2340 static int
2341 gen7_pp_avs_x_steps(void *private_context)
2342 {
2343     struct pp_avs_context *pp_avs_context = private_context;
2344
2345     return pp_avs_context->dest_w / 16;
2346 }
2347
2348 static int
2349 gen7_pp_avs_y_steps(void *private_context)
2350 {
2351     struct pp_avs_context *pp_avs_context = private_context;
2352
2353     return pp_avs_context->dest_h / 16;
2354 }
2355
2356 static int
2357 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2358 {
2359     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2360     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2361
2362     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2363     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2364     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2365     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2366
2367     return 0;
2368 }
2369
2370 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2371                                               struct i965_post_processing_context *pp_context,
2372                                               const struct i965_surface *surface)
2373 {
2374     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2375     int fourcc = pp_get_surface_fourcc(ctx, surface);
2376     
2377     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2378         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2379         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2380         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2381     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2382         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2383         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2384         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2385     }
2386 }
2387
2388 static VAStatus
2389 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2390                            const struct i965_surface *src_surface,
2391                            const VARectangle *src_rect,
2392                            struct i965_surface *dst_surface,
2393                            const VARectangle *dst_rect,
2394                            void *filter_param)
2395 {
2396     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2397     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2398     struct gen7_sampler_8x8 *sampler_8x8;
2399     struct i965_sampler_8x8_state *sampler_8x8_state;
2400     int index, i;
2401     int width[3], height[3], pitch[3], offset[3];
2402     int src_width, src_height;
2403
2404     /* source surface */
2405     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2406                                          width, height, pitch, offset);
2407     src_width = width[0];
2408     src_height = height[0];
2409
2410     /* destination surface */
2411     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2412                                          width, height, pitch, offset);
2413
2414     /* sampler 8x8 state */
2415     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2416     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2417     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2418     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2419     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2420
2421     for (i = 0; i < 17; i++) {
2422         /* for Y channel, currently ignore */
2423         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2424         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2425         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2426         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2427         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2428         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2429         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2430         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2431         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2432         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2433         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2434         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2435         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2436         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2437         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2438         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2439         /* for U/V channel, 0.25 */
2440         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2441         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2442         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2443         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2444         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2445         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2446         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2447         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2448         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2449         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2450         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2451         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2452         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2453         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2454         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2455         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2456     }
2457
2458     sampler_8x8_state->dw136.default_sharpness_level = 0;
2459     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2460     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2461     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2462     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2463
2464     /* sampler 8x8 */
2465     dri_bo_map(pp_context->sampler_state_table.bo, True);
2466     assert(pp_context->sampler_state_table.bo->virtual);
2467     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2468     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2469
2470     /* sample_8x8 Y index 4 */
2471     index = 4;
2472     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2473     sampler_8x8[index].dw0.global_noise_estimation = 255;
2474     sampler_8x8[index].dw0.ief_bypass = 1;
2475
2476     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2477
2478     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2479     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2480     sampler_8x8[index].dw2.r5x_coefficient = 9;
2481     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2482     sampler_8x8[index].dw2.r5c_coefficient = 3;
2483
2484     sampler_8x8[index].dw3.r3x_coefficient = 27;
2485     sampler_8x8[index].dw3.r3c_coefficient = 5;
2486     sampler_8x8[index].dw3.gain_factor = 40;
2487     sampler_8x8[index].dw3.non_edge_weight = 1;
2488     sampler_8x8[index].dw3.regular_weight = 2;
2489     sampler_8x8[index].dw3.strong_edge_weight = 7;
2490     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2491
2492     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2493                       I915_GEM_DOMAIN_RENDER, 
2494                       0,
2495                       0,
2496                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2497                       pp_context->sampler_state_table.bo_8x8);
2498
2499     /* sample_8x8 UV index 8 */
2500     index = 8;
2501     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2502     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2503     sampler_8x8[index].dw0.global_noise_estimation = 255;
2504     sampler_8x8[index].dw0.ief_bypass = 1;
2505     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2506     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2507     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2508     sampler_8x8[index].dw2.r5x_coefficient = 9;
2509     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2510     sampler_8x8[index].dw2.r5c_coefficient = 3;
2511     sampler_8x8[index].dw3.r3x_coefficient = 27;
2512     sampler_8x8[index].dw3.r3c_coefficient = 5;
2513     sampler_8x8[index].dw3.gain_factor = 40;
2514     sampler_8x8[index].dw3.non_edge_weight = 1;
2515     sampler_8x8[index].dw3.regular_weight = 2;
2516     sampler_8x8[index].dw3.strong_edge_weight = 7;
2517     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2518
2519     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2520                       I915_GEM_DOMAIN_RENDER, 
2521                       0,
2522                       0,
2523                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2524                       pp_context->sampler_state_table.bo_8x8);
2525
2526     /* sampler_8x8 V, index 12 */
2527     index = 12;
2528     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2529     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2530     sampler_8x8[index].dw0.global_noise_estimation = 255;
2531     sampler_8x8[index].dw0.ief_bypass = 1;
2532     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2533     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2534     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2535     sampler_8x8[index].dw2.r5x_coefficient = 9;
2536     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2537     sampler_8x8[index].dw2.r5c_coefficient = 3;
2538     sampler_8x8[index].dw3.r3x_coefficient = 27;
2539     sampler_8x8[index].dw3.r3c_coefficient = 5;
2540     sampler_8x8[index].dw3.gain_factor = 40;
2541     sampler_8x8[index].dw3.non_edge_weight = 1;
2542     sampler_8x8[index].dw3.regular_weight = 2;
2543     sampler_8x8[index].dw3.strong_edge_weight = 7;
2544     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2545
2546     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2547                       I915_GEM_DOMAIN_RENDER, 
2548                       0,
2549                       0,
2550                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2551                       pp_context->sampler_state_table.bo_8x8);
2552
2553     dri_bo_unmap(pp_context->sampler_state_table.bo);
2554
2555     /* private function & data */
2556     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2557     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2558     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2559
2560     pp_avs_context->dest_x = dst_rect->x;
2561     pp_avs_context->dest_y = dst_rect->y;
2562     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2563     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2564     pp_avs_context->src_w = src_rect->width;
2565     pp_avs_context->src_h = src_rect->height;
2566
2567     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2568     dw = MAX(dw, pp_avs_context->dest_w);
2569
2570     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2571     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2572     pp_static_parameter->grf2.avs_wa_width = dw;
2573     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2574     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2575
2576     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2577     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2578     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2579     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2580
2581     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2582
2583     dst_surface->flags = src_surface->flags;
2584
2585     return VA_STATUS_SUCCESS;
2586 }
2587
2588 static int
2589 pp_dndi_x_steps(void *private_context)
2590 {
2591     return 1;
2592 }
2593
2594 static int
2595 pp_dndi_y_steps(void *private_context)
2596 {
2597     struct pp_dndi_context *pp_dndi_context = private_context;
2598
2599     return pp_dndi_context->dest_h / 4;
2600 }
2601
2602 static int
2603 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2604 {
2605     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2606
2607     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2608     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2609
2610     return 0;
2611 }
2612
2613 static VAStatus
2614 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2615                         const struct i965_surface *src_surface,
2616                         const VARectangle *src_rect,
2617                         struct i965_surface *dst_surface,
2618                         const VARectangle *dst_rect,
2619                         void *filter_param)
2620 {
2621     struct i965_driver_data *i965 = i965_driver_data(ctx);
2622     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2623     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2624     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2625     struct object_surface *obj_surface;
2626     struct i965_sampler_dndi *sampler_dndi;
2627     int index;
2628     int w, h;
2629     int orig_w, orig_h;
2630     int dndi_top_first = 1;
2631
2632     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2633         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2634
2635     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2636         dndi_top_first = 1;
2637     else
2638         dndi_top_first = 0;
2639
2640     /* surface */
2641     obj_surface = SURFACE(src_surface->id);
2642     orig_w = obj_surface->orig_width;
2643     orig_h = obj_surface->orig_height;
2644     w = obj_surface->width;
2645     h = obj_surface->height;
2646
2647     if (pp_context->stmm.bo == NULL) {
2648         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2649                                            "STMM surface",
2650                                            w * h,
2651                                            4096);
2652         assert(pp_context->stmm.bo);
2653     }
2654
2655     /* source UV surface index 2 */
2656     i965_pp_set_surface_state(ctx, pp_context,
2657                               obj_surface->bo, w * h,
2658                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2659                               2, 0);
2660
2661     /* source YUV surface index 4 */
2662     i965_pp_set_surface2_state(ctx, pp_context,
2663                                obj_surface->bo, 0,
2664                                orig_w, orig_h, w,
2665                                0, h,
2666                                SURFACE_FORMAT_PLANAR_420_8, 1,
2667                                4);
2668
2669     /* source STMM surface index 20 */
2670     i965_pp_set_surface_state(ctx, pp_context,
2671                               pp_context->stmm.bo, 0,
2672                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2673                               20, 1);
2674
2675     /* destination surface */
2676     obj_surface = SURFACE(dst_surface->id);
2677     orig_w = obj_surface->orig_width;
2678     orig_h = obj_surface->orig_height;
2679     w = obj_surface->width;
2680     h = obj_surface->height;
2681
2682     /* destination Y surface index 7 */
2683     i965_pp_set_surface_state(ctx, pp_context,
2684                               obj_surface->bo, 0,
2685                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2686                               7, 1);
2687
2688     /* destination UV surface index 8 */
2689     i965_pp_set_surface_state(ctx, pp_context,
2690                               obj_surface->bo, w * h,
2691                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2692                               8, 1);
2693     /* sampler dndi */
2694     dri_bo_map(pp_context->sampler_state_table.bo, True);
2695     assert(pp_context->sampler_state_table.bo->virtual);
2696     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2697     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2698
2699     /* sample dndi index 1 */
2700     index = 0;
2701     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2702     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2703     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2704     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2705
2706     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2707     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2708     sampler_dndi[index].dw1.stmm_c2 = 1;
2709     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2710     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2711
2712     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2713     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2714     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2715     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2716
2717     sampler_dndi[index].dw3.maximum_stmm = 128;
2718     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2719     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2720     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2721     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2722
2723     sampler_dndi[index].dw4.sdi_delta = 8;
2724     sampler_dndi[index].dw4.sdi_threshold = 128;
2725     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2726     sampler_dndi[index].dw4.stmm_shift_up = 0;
2727     sampler_dndi[index].dw4.stmm_shift_down = 0;
2728     sampler_dndi[index].dw4.minimum_stmm = 0;
2729
2730     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2731     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2732     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2733     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2734
2735     sampler_dndi[index].dw6.dn_enable = 1;
2736     sampler_dndi[index].dw6.di_enable = 1;
2737     sampler_dndi[index].dw6.di_partial = 0;
2738     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2739     sampler_dndi[index].dw6.dndi_stream_id = 0;
2740     sampler_dndi[index].dw6.dndi_first_frame = 1;
2741     sampler_dndi[index].dw6.progressive_dn = 0;
2742     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2743     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2744     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2745
2746     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2747     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2748     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2749     sampler_dndi[index].dw7.column_width_minus1 = 0;
2750
2751     dri_bo_unmap(pp_context->sampler_state_table.bo);
2752
2753     /* private function & data */
2754     pp_context->pp_x_steps = pp_dndi_x_steps;
2755     pp_context->pp_y_steps = pp_dndi_y_steps;
2756     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2757
2758     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2759     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2760     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2761     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2762
2763     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2764     pp_inline_parameter->grf5.number_blocks = w / 16;
2765     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2766     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2767
2768     pp_dndi_context->dest_w = w;
2769     pp_dndi_context->dest_h = h;
2770
2771     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2772
2773     return VA_STATUS_SUCCESS;
2774 }
2775
2776 static int
2777 pp_dn_x_steps(void *private_context)
2778 {
2779     return 1;
2780 }
2781
2782 static int
2783 pp_dn_y_steps(void *private_context)
2784 {
2785     struct pp_dn_context *pp_dn_context = private_context;
2786
2787     return pp_dn_context->dest_h / 8;
2788 }
2789
2790 static int
2791 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2792 {
2793     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2794
2795     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2796     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2797
2798     return 0;
2799 }
2800
2801 static VAStatus
2802 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2803                       const struct i965_surface *src_surface,
2804                       const VARectangle *src_rect,
2805                       struct i965_surface *dst_surface,
2806                       const VARectangle *dst_rect,
2807                       void *filter_param)
2808 {
2809     struct i965_driver_data *i965 = i965_driver_data(ctx);
2810     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2811     struct object_surface *obj_surface;
2812     struct i965_sampler_dndi *sampler_dndi;
2813     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2814     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2815     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2816     int index;
2817     int w, h;
2818     int orig_w, orig_h;
2819     int dn_strength = 15;
2820     int dndi_top_first = 1;
2821     int dn_progressive = 0;
2822
2823     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2824         dndi_top_first = 1;
2825         dn_progressive = 1;
2826     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2827         dndi_top_first = 1;
2828         dn_progressive = 0;
2829     } else {
2830         dndi_top_first = 0;
2831         dn_progressive = 0;
2832     }
2833
2834     if (dn_filter_param) {
2835         float value = dn_filter_param->value;
2836         
2837         if (value > 1.0)
2838             value = 1.0;
2839         
2840         if (value < 0.0)
2841             value = 0.0;
2842
2843         dn_strength = (int)(value * 31.0F);
2844     }
2845
2846     /* surface */
2847     obj_surface = SURFACE(src_surface->id);
2848     orig_w = obj_surface->orig_width;
2849     orig_h = obj_surface->orig_height;
2850     w = obj_surface->width;
2851     h = obj_surface->height;
2852
2853     if (pp_context->stmm.bo == NULL) {
2854         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2855                                            "STMM surface",
2856                                            w * h,
2857                                            4096);
2858         assert(pp_context->stmm.bo);
2859     }
2860
2861     /* source UV surface index 2 */
2862     i965_pp_set_surface_state(ctx, pp_context,
2863                               obj_surface->bo, w * h,
2864                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2865                               2, 0);
2866
2867     /* source YUV surface index 4 */
2868     i965_pp_set_surface2_state(ctx, pp_context,
2869                                obj_surface->bo, 0,
2870                                orig_w, orig_h, w,
2871                                0, h,
2872                                SURFACE_FORMAT_PLANAR_420_8, 1,
2873                                4);
2874
2875     /* source STMM surface index 20 */
2876     i965_pp_set_surface_state(ctx, pp_context,
2877                               pp_context->stmm.bo, 0,
2878                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2879                               20, 1);
2880
2881     /* destination surface */
2882     obj_surface = SURFACE(dst_surface->id);
2883     orig_w = obj_surface->orig_width;
2884     orig_h = obj_surface->orig_height;
2885     w = obj_surface->width;
2886     h = obj_surface->height;
2887
2888     /* destination Y surface index 7 */
2889     i965_pp_set_surface_state(ctx, pp_context,
2890                               obj_surface->bo, 0,
2891                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2892                               7, 1);
2893
2894     /* destination UV surface index 8 */
2895     i965_pp_set_surface_state(ctx, pp_context,
2896                               obj_surface->bo, w * h,
2897                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2898                               8, 1);
2899     /* sampler dn */
2900     dri_bo_map(pp_context->sampler_state_table.bo, True);
2901     assert(pp_context->sampler_state_table.bo->virtual);
2902     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2903     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2904
2905     /* sample dndi index 1 */
2906     index = 0;
2907     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2908     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2909     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2910     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2911
2912     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2913     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2914     sampler_dndi[index].dw1.stmm_c2 = 0;
2915     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2916     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2917
2918     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2919     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2920     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2921     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2922
2923     sampler_dndi[index].dw3.maximum_stmm = 128;
2924     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2925     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2926     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2927     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2928
2929     sampler_dndi[index].dw4.sdi_delta = 8;
2930     sampler_dndi[index].dw4.sdi_threshold = 128;
2931     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2932     sampler_dndi[index].dw4.stmm_shift_up = 0;
2933     sampler_dndi[index].dw4.stmm_shift_down = 0;
2934     sampler_dndi[index].dw4.minimum_stmm = 0;
2935
2936     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2937     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2938     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2939     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2940
2941     sampler_dndi[index].dw6.dn_enable = 1;
2942     sampler_dndi[index].dw6.di_enable = 0;
2943     sampler_dndi[index].dw6.di_partial = 0;
2944     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2945     sampler_dndi[index].dw6.dndi_stream_id = 1;
2946     sampler_dndi[index].dw6.dndi_first_frame = 1;
2947     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2948     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2949     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2950     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2951
2952     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2953     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2954     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2955     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2956
2957     dri_bo_unmap(pp_context->sampler_state_table.bo);
2958
2959     /* private function & data */
2960     pp_context->pp_x_steps = pp_dn_x_steps;
2961     pp_context->pp_y_steps = pp_dn_y_steps;
2962     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2963
2964     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2965     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2966     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2967     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2968
2969     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2970     pp_inline_parameter->grf5.number_blocks = w / 16;
2971     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2972     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2973
2974     pp_dn_context->dest_w = w;
2975     pp_dn_context->dest_h = h;
2976
2977     dst_surface->flags = src_surface->flags;
2978     
2979     return VA_STATUS_SUCCESS;
2980 }
2981
2982 static int
2983 gen7_pp_dndi_x_steps(void *private_context)
2984 {
2985     struct pp_dndi_context *pp_dndi_context = private_context;
2986
2987     return pp_dndi_context->dest_w / 16;
2988 }
2989
2990 static int
2991 gen7_pp_dndi_y_steps(void *private_context)
2992 {
2993     struct pp_dndi_context *pp_dndi_context = private_context;
2994
2995     return pp_dndi_context->dest_h / 4;
2996 }
2997
2998 static int
2999 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3000 {
3001     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3002
3003     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3004     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3005
3006     return 0;
3007 }
3008
3009 static VAStatus
3010 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3011                              const struct i965_surface *src_surface,
3012                              const VARectangle *src_rect,
3013                              struct i965_surface *dst_surface,
3014                              const VARectangle *dst_rect,
3015                              void *filter_param)
3016 {
3017     struct i965_driver_data *i965 = i965_driver_data(ctx);
3018     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3019     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3020     struct object_surface *obj_surface;
3021     struct gen7_sampler_dndi *sampler_dndi;
3022     int index;
3023     int w, h;
3024     int orig_w, orig_h;
3025     int dndi_top_first = 1;
3026
3027     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3028         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3029
3030     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3031         dndi_top_first = 1;
3032     else
3033         dndi_top_first = 0;
3034
3035     /* surface */
3036     obj_surface = SURFACE(src_surface->id);
3037     orig_w = obj_surface->orig_width;
3038     orig_h = obj_surface->orig_height;
3039     w = obj_surface->width;
3040     h = obj_surface->height;
3041
3042     if (pp_context->stmm.bo == NULL) {
3043         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3044                                            "STMM surface",
3045                                            w * h,
3046                                            4096);
3047         assert(pp_context->stmm.bo);
3048     }
3049
3050     /* source UV surface index 1 */
3051     gen7_pp_set_surface_state(ctx, pp_context,
3052                               obj_surface->bo, w * h,
3053                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3054                               1, 0);
3055
3056     /* source YUV surface index 3 */
3057     gen7_pp_set_surface2_state(ctx, pp_context,
3058                                obj_surface->bo, 0,
3059                                orig_w, orig_h, w,
3060                                0, h,
3061                                SURFACE_FORMAT_PLANAR_420_8, 1,
3062                                3);
3063
3064     /* source (temporal reference) YUV surface index 4 */
3065     gen7_pp_set_surface2_state(ctx, pp_context,
3066                                obj_surface->bo, 0,
3067                                orig_w, orig_h, w,
3068                                0, h,
3069                                SURFACE_FORMAT_PLANAR_420_8, 1,
3070                                4);
3071
3072     /* STMM / History Statistics input surface, index 5 */
3073     gen7_pp_set_surface_state(ctx, pp_context,
3074                               pp_context->stmm.bo, 0,
3075                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3076                               5, 1);
3077
3078     /* destination surface */
3079     obj_surface = SURFACE(dst_surface->id);
3080     orig_w = obj_surface->orig_width;
3081     orig_h = obj_surface->orig_height;
3082     w = obj_surface->width;
3083     h = obj_surface->height;
3084
3085     /* destination(Previous frame) Y surface index 27 */
3086     gen7_pp_set_surface_state(ctx, pp_context,
3087                               obj_surface->bo, 0,
3088                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3089                               27, 1);
3090
3091     /* destination(Previous frame) UV surface index 28 */
3092     gen7_pp_set_surface_state(ctx, pp_context,
3093                               obj_surface->bo, w * h,
3094                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3095                               28, 1);
3096
3097     /* destination(Current frame) Y surface index 30 */
3098     gen7_pp_set_surface_state(ctx, pp_context,
3099                               obj_surface->bo, 0,
3100                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3101                               30, 1);
3102
3103     /* destination(Current frame) UV surface index 31 */
3104     gen7_pp_set_surface_state(ctx, pp_context,
3105                               obj_surface->bo, w * h,
3106                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3107                               31, 1);
3108
3109     /* STMM output surface, index 33 */
3110     gen7_pp_set_surface_state(ctx, pp_context,
3111                               pp_context->stmm.bo, 0,
3112                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3113                               33, 1);
3114
3115
3116     /* sampler dndi */
3117     dri_bo_map(pp_context->sampler_state_table.bo, True);
3118     assert(pp_context->sampler_state_table.bo->virtual);
3119     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3120     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3121
3122     /* sample dndi index 0 */
3123     index = 0;
3124     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3125     sampler_dndi[index].dw0.dnmh_delt = 8;
3126     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3127     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3128     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3129     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3130
3131     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3132     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3133     sampler_dndi[index].dw1.stmm_c2 = 0;
3134     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3135     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3136
3137     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3138     sampler_dndi[index].dw2.bne_edge_th = 1;
3139     sampler_dndi[index].dw2.smooth_mv_th = 0;
3140     sampler_dndi[index].dw2.sad_tight_th = 5;
3141     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3142     sampler_dndi[index].dw2.good_neighbor_th = 4;
3143
3144     sampler_dndi[index].dw3.maximum_stmm = 128;
3145     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3146     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3147     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3148     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3149
3150     sampler_dndi[index].dw4.sdi_delta = 8;
3151     sampler_dndi[index].dw4.sdi_threshold = 128;
3152     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3153     sampler_dndi[index].dw4.stmm_shift_up = 0;
3154     sampler_dndi[index].dw4.stmm_shift_down = 0;
3155     sampler_dndi[index].dw4.minimum_stmm = 0;
3156
3157     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3158     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3159     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3160     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3161
3162     sampler_dndi[index].dw6.dn_enable = 0;
3163     sampler_dndi[index].dw6.di_enable = 1;
3164     sampler_dndi[index].dw6.di_partial = 0;
3165     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3166     sampler_dndi[index].dw6.dndi_stream_id = 1;
3167     sampler_dndi[index].dw6.dndi_first_frame = 1;
3168     sampler_dndi[index].dw6.progressive_dn = 0;
3169     sampler_dndi[index].dw6.mcdi_enable = 0;
3170     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3171     sampler_dndi[index].dw6.cat_th1 = 0;
3172     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3173     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3174
3175     sampler_dndi[index].dw7.sad_tha = 5;
3176     sampler_dndi[index].dw7.sad_thb = 10;
3177     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3178     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3179     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3180     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3181     sampler_dndi[index].dw7.neighborpixel_th = 10;
3182     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3183
3184     dri_bo_unmap(pp_context->sampler_state_table.bo);
3185
3186     /* private function & data */
3187     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3188     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3189     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3190
3191     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3192     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3193     pp_static_parameter->grf1.di_top_field_first = 0;
3194     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3195
3196     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3197     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3198     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3199
3200     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3201     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3202
3203     pp_dndi_context->dest_w = w;
3204     pp_dndi_context->dest_h = h;
3205
3206     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3207
3208     return VA_STATUS_SUCCESS;
3209 }
3210
3211 static int
3212 gen7_pp_dn_x_steps(void *private_context)
3213 {
3214     struct pp_dn_context *pp_dn_context = private_context;
3215
3216     return pp_dn_context->dest_w / 16;
3217 }
3218
3219 static int
3220 gen7_pp_dn_y_steps(void *private_context)
3221 {
3222     struct pp_dn_context *pp_dn_context = private_context;
3223
3224     return pp_dn_context->dest_h / 4;
3225 }
3226
3227 static int
3228 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3229 {
3230     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3231
3232     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3233     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3234
3235     return 0;
3236 }
3237
3238 static VAStatus
3239 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3240                            const struct i965_surface *src_surface,
3241                            const VARectangle *src_rect,
3242                            struct i965_surface *dst_surface,
3243                            const VARectangle *dst_rect,
3244                            void *filter_param)
3245 {
3246     struct i965_driver_data *i965 = i965_driver_data(ctx);
3247     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3248     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3249     struct object_surface *obj_surface;
3250     struct gen7_sampler_dndi *sampler_dn;
3251     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3252     int index;
3253     int w, h;
3254     int orig_w, orig_h;
3255     int dn_strength = 15;
3256     int dndi_top_first = 1;
3257     int dn_progressive = 0;
3258
3259     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3260         dndi_top_first = 1;
3261         dn_progressive = 1;
3262     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3263         dndi_top_first = 1;
3264         dn_progressive = 0;
3265     } else {
3266         dndi_top_first = 0;
3267         dn_progressive = 0;
3268     }
3269
3270     if (dn_filter_param) {
3271         float value = dn_filter_param->value;
3272         
3273         if (value > 1.0)
3274             value = 1.0;
3275         
3276         if (value < 0.0)
3277             value = 0.0;
3278
3279         dn_strength = (int)(value * 31.0F);
3280     }
3281
3282     /* surface */
3283     obj_surface = SURFACE(src_surface->id);
3284     orig_w = obj_surface->orig_width;
3285     orig_h = obj_surface->orig_height;
3286     w = obj_surface->width;
3287     h = obj_surface->height;
3288
3289     if (pp_context->stmm.bo == NULL) {
3290         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3291                                            "STMM surface",
3292                                            w * h,
3293                                            4096);
3294         assert(pp_context->stmm.bo);
3295     }
3296
3297     /* source UV surface index 1 */
3298     gen7_pp_set_surface_state(ctx, pp_context,
3299                               obj_surface->bo, w * h,
3300                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3301                               1, 0);
3302
3303     /* source YUV surface index 3 */
3304     gen7_pp_set_surface2_state(ctx, pp_context,
3305                                obj_surface->bo, 0,
3306                                orig_w, orig_h, w,
3307                                0, h,
3308                                SURFACE_FORMAT_PLANAR_420_8, 1,
3309                                3);
3310
3311     /* source (temporal reference) YUV surface index 4 */
3312     gen7_pp_set_surface2_state(ctx, pp_context,
3313                                obj_surface->bo, 0,
3314                                orig_w, orig_h, w,
3315                                0, h,
3316                                SURFACE_FORMAT_PLANAR_420_8, 1,
3317                                4);
3318
3319     /* STMM / History Statistics input surface, index 5 */
3320     gen7_pp_set_surface_state(ctx, pp_context,
3321                               pp_context->stmm.bo, 0,
3322                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3323                               5, 1);
3324
3325     /* destination surface */
3326     obj_surface = SURFACE(dst_surface->id);
3327     orig_w = obj_surface->orig_width;
3328     orig_h = obj_surface->orig_height;
3329     w = obj_surface->width;
3330     h = obj_surface->height;
3331
3332     /* destination Y surface index 24 */
3333     gen7_pp_set_surface_state(ctx, pp_context,
3334                               obj_surface->bo, 0,
3335                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3336                               24, 1);
3337
3338     /* destination UV surface index 25 */
3339     gen7_pp_set_surface_state(ctx, pp_context,
3340                               obj_surface->bo, w * h,
3341                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3342                               25, 1);
3343
3344     /* sampler dn */
3345     dri_bo_map(pp_context->sampler_state_table.bo, True);
3346     assert(pp_context->sampler_state_table.bo->virtual);
3347     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3348     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3349
3350     /* sample dn index 1 */
3351     index = 0;
3352     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3353     sampler_dn[index].dw0.dnmh_delt = 8;
3354     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3355     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3356     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3357     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3358
3359     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3360     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3361     sampler_dn[index].dw1.stmm_c2 = 0;
3362     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3363     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3364
3365     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3366     sampler_dn[index].dw2.bne_edge_th = 1;
3367     sampler_dn[index].dw2.smooth_mv_th = 0;
3368     sampler_dn[index].dw2.sad_tight_th = 5;
3369     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3370     sampler_dn[index].dw2.good_neighbor_th = 4;
3371
3372     sampler_dn[index].dw3.maximum_stmm = 128;
3373     sampler_dn[index].dw3.multipler_for_vecm = 2;
3374     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3375     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3376     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3377
3378     sampler_dn[index].dw4.sdi_delta = 8;
3379     sampler_dn[index].dw4.sdi_threshold = 128;
3380     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3381     sampler_dn[index].dw4.stmm_shift_up = 0;
3382     sampler_dn[index].dw4.stmm_shift_down = 0;
3383     sampler_dn[index].dw4.minimum_stmm = 0;
3384
3385     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3386     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3387     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3388     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3389
3390     sampler_dn[index].dw6.dn_enable = 1;
3391     sampler_dn[index].dw6.di_enable = 0;
3392     sampler_dn[index].dw6.di_partial = 0;
3393     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3394     sampler_dn[index].dw6.dndi_stream_id = 1;
3395     sampler_dn[index].dw6.dndi_first_frame = 1;
3396     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3397     sampler_dn[index].dw6.mcdi_enable = 0;
3398     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3399     sampler_dn[index].dw6.cat_th1 = 0;
3400     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3401     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3402
3403     sampler_dn[index].dw7.sad_tha = 5;
3404     sampler_dn[index].dw7.sad_thb = 10;
3405     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3406     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3407     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3408     sampler_dn[index].dw7.vdi_walker_enable = 0;
3409     sampler_dn[index].dw7.neighborpixel_th = 10;
3410     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3411
3412     dri_bo_unmap(pp_context->sampler_state_table.bo);
3413
3414     /* private function & data */
3415     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3416     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3417     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3418
3419     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3420     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3421     pp_static_parameter->grf1.di_top_field_first = 0;
3422     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3423
3424     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3425     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3426     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3427
3428     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3429     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3430
3431     pp_dn_context->dest_w = w;
3432     pp_dn_context->dest_h = h;
3433
3434     dst_surface->flags = src_surface->flags;
3435
3436     return VA_STATUS_SUCCESS;
3437 }
3438
3439 static VAStatus
3440 ironlake_pp_initialize(
3441     VADriverContextP   ctx,
3442     struct i965_post_processing_context *pp_context,
3443     const struct i965_surface *src_surface,
3444     const VARectangle *src_rect,
3445     struct i965_surface *dst_surface,
3446     const VARectangle *dst_rect,
3447     int                pp_index,
3448     void *filter_param
3449 )
3450 {
3451     VAStatus va_status;
3452     struct i965_driver_data *i965 = i965_driver_data(ctx);
3453     struct pp_module *pp_module;
3454     dri_bo *bo;
3455     int static_param_size, inline_param_size;
3456
3457     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3458     bo = dri_bo_alloc(i965->intel.bufmgr,
3459                       "surface state & binding table",
3460                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3461                       4096);
3462     assert(bo);
3463     pp_context->surface_state_binding_table.bo = bo;
3464
3465     dri_bo_unreference(pp_context->curbe.bo);
3466     bo = dri_bo_alloc(i965->intel.bufmgr,
3467                       "constant buffer",
3468                       4096, 
3469                       4096);
3470     assert(bo);
3471     pp_context->curbe.bo = bo;
3472
3473     dri_bo_unreference(pp_context->idrt.bo);
3474     bo = dri_bo_alloc(i965->intel.bufmgr, 
3475                       "interface discriptor", 
3476                       sizeof(struct i965_interface_descriptor), 
3477                       4096);
3478     assert(bo);
3479     pp_context->idrt.bo = bo;
3480     pp_context->idrt.num_interface_descriptors = 0;
3481
3482     dri_bo_unreference(pp_context->sampler_state_table.bo);
3483     bo = dri_bo_alloc(i965->intel.bufmgr, 
3484                       "sampler state table", 
3485                       4096,
3486                       4096);
3487     assert(bo);
3488     dri_bo_map(bo, True);
3489     memset(bo->virtual, 0, bo->size);
3490     dri_bo_unmap(bo);
3491     pp_context->sampler_state_table.bo = bo;
3492
3493     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3494     bo = dri_bo_alloc(i965->intel.bufmgr, 
3495                       "sampler 8x8 state ",
3496                       4096,
3497                       4096);
3498     assert(bo);
3499     pp_context->sampler_state_table.bo_8x8 = bo;
3500
3501     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3502     bo = dri_bo_alloc(i965->intel.bufmgr, 
3503                       "sampler 8x8 state ",
3504                       4096,
3505                       4096);
3506     assert(bo);
3507     pp_context->sampler_state_table.bo_8x8_uv = bo;
3508
3509     dri_bo_unreference(pp_context->vfe_state.bo);
3510     bo = dri_bo_alloc(i965->intel.bufmgr, 
3511                       "vfe state", 
3512                       sizeof(struct i965_vfe_state), 
3513                       4096);
3514     assert(bo);
3515     pp_context->vfe_state.bo = bo;
3516
3517     static_param_size = sizeof(struct pp_static_parameter);
3518     inline_param_size = sizeof(struct pp_inline_parameter);
3519
3520     memset(pp_context->pp_static_parameter, 0, static_param_size);
3521     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3522     
3523     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3524     pp_context->current_pp = pp_index;
3525     pp_module = &pp_context->pp_modules[pp_index];
3526     
3527     if (pp_module->initialize)
3528         va_status = pp_module->initialize(ctx, pp_context,
3529                                           src_surface,
3530                                           src_rect,
3531                                           dst_surface,
3532                                           dst_rect,
3533                                           filter_param);
3534     else
3535         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3536
3537     return va_status;
3538 }
3539
3540 static VAStatus
3541 ironlake_post_processing(
3542     VADriverContextP   ctx,
3543     struct i965_post_processing_context *pp_context,
3544     const struct i965_surface *src_surface,
3545     const VARectangle *src_rect,
3546     struct i965_surface *dst_surface,
3547     const VARectangle *dst_rect,
3548     int                pp_index,
3549     void *filter_param
3550 )
3551 {
3552     VAStatus va_status;
3553
3554     va_status = ironlake_pp_initialize(ctx, pp_context,
3555                                        src_surface,
3556                                        src_rect,
3557                                        dst_surface,
3558                                        dst_rect,
3559                                        pp_index,
3560                                        filter_param);
3561
3562     if (va_status == VA_STATUS_SUCCESS) {
3563         ironlake_pp_states_setup(ctx, pp_context);
3564         ironlake_pp_pipeline_setup(ctx, pp_context);
3565     }
3566
3567     return va_status;
3568 }
3569
3570 static VAStatus
3571 gen6_pp_initialize(
3572     VADriverContextP   ctx,
3573     struct i965_post_processing_context *pp_context,
3574     const struct i965_surface *src_surface,
3575     const VARectangle *src_rect,
3576     struct i965_surface *dst_surface,
3577     const VARectangle *dst_rect,
3578     int                pp_index,
3579     void *filter_param
3580 )
3581 {
3582     VAStatus va_status;
3583     struct i965_driver_data *i965 = i965_driver_data(ctx);
3584     struct pp_module *pp_module;
3585     dri_bo *bo;
3586     int static_param_size, inline_param_size;
3587
3588     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3589     bo = dri_bo_alloc(i965->intel.bufmgr,
3590                       "surface state & binding table",
3591                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3592                       4096);
3593     assert(bo);
3594     pp_context->surface_state_binding_table.bo = bo;
3595
3596     dri_bo_unreference(pp_context->curbe.bo);
3597     bo = dri_bo_alloc(i965->intel.bufmgr,
3598                       "constant buffer",
3599                       4096, 
3600                       4096);
3601     assert(bo);
3602     pp_context->curbe.bo = bo;
3603
3604     dri_bo_unreference(pp_context->idrt.bo);
3605     bo = dri_bo_alloc(i965->intel.bufmgr, 
3606                       "interface discriptor", 
3607                       sizeof(struct gen6_interface_descriptor_data), 
3608                       4096);
3609     assert(bo);
3610     pp_context->idrt.bo = bo;
3611     pp_context->idrt.num_interface_descriptors = 0;
3612
3613     dri_bo_unreference(pp_context->sampler_state_table.bo);
3614     bo = dri_bo_alloc(i965->intel.bufmgr, 
3615                       "sampler state table", 
3616                       4096,
3617                       4096);
3618     assert(bo);
3619     dri_bo_map(bo, True);
3620     memset(bo->virtual, 0, bo->size);
3621     dri_bo_unmap(bo);
3622     pp_context->sampler_state_table.bo = bo;
3623
3624     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3625     bo = dri_bo_alloc(i965->intel.bufmgr, 
3626                       "sampler 8x8 state ",
3627                       4096,
3628                       4096);
3629     assert(bo);
3630     pp_context->sampler_state_table.bo_8x8 = bo;
3631
3632     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3633     bo = dri_bo_alloc(i965->intel.bufmgr, 
3634                       "sampler 8x8 state ",
3635                       4096,
3636                       4096);
3637     assert(bo);
3638     pp_context->sampler_state_table.bo_8x8_uv = bo;
3639
3640     dri_bo_unreference(pp_context->vfe_state.bo);
3641     bo = dri_bo_alloc(i965->intel.bufmgr, 
3642                       "vfe state", 
3643                       sizeof(struct i965_vfe_state), 
3644                       4096);
3645     assert(bo);
3646     pp_context->vfe_state.bo = bo;
3647     
3648     if (IS_GEN7(i965->intel.device_id)) {
3649         static_param_size = sizeof(struct gen7_pp_static_parameter);
3650         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3651     } else {
3652         static_param_size = sizeof(struct pp_static_parameter);
3653         inline_param_size = sizeof(struct pp_inline_parameter);
3654     }
3655
3656     memset(pp_context->pp_static_parameter, 0, static_param_size);
3657     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3658
3659     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3660     pp_context->current_pp = pp_index;
3661     pp_module = &pp_context->pp_modules[pp_index];
3662     
3663     if (pp_module->initialize)
3664         va_status = pp_module->initialize(ctx, pp_context,
3665                                           src_surface,
3666                                           src_rect,
3667                                           dst_surface,
3668                                           dst_rect,
3669                                           filter_param);
3670     else
3671         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3672
3673     calculate_boundary_block_mask(pp_context, dst_rect);
3674     
3675     return va_status;
3676 }
3677
3678 static void
3679 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3680                                    struct i965_post_processing_context *pp_context)
3681 {
3682     struct i965_driver_data *i965 = i965_driver_data(ctx);
3683     struct gen6_interface_descriptor_data *desc;
3684     dri_bo *bo;
3685     int pp_index = pp_context->current_pp;
3686
3687     bo = pp_context->idrt.bo;
3688     dri_bo_map(bo, True);
3689     assert(bo->virtual);
3690     desc = bo->virtual;
3691     memset(desc, 0, sizeof(*desc));
3692     desc->desc0.kernel_start_pointer = 
3693         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3694     desc->desc1.single_program_flow = 1;
3695     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3696     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3697     desc->desc2.sampler_state_pointer = 
3698         pp_context->sampler_state_table.bo->offset >> 5;
3699     desc->desc3.binding_table_entry_count = 0;
3700     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3701     desc->desc4.constant_urb_entry_read_offset = 0;
3702
3703     if (IS_GEN7(i965->intel.device_id))
3704         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3705     else
3706         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3707
3708     dri_bo_emit_reloc(bo,
3709                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3710                       0,
3711                       offsetof(struct gen6_interface_descriptor_data, desc0),
3712                       pp_context->pp_modules[pp_index].kernel.bo);
3713
3714     dri_bo_emit_reloc(bo,
3715                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3716                       desc->desc2.sampler_count << 2,
3717                       offsetof(struct gen6_interface_descriptor_data, desc2),
3718                       pp_context->sampler_state_table.bo);
3719
3720     dri_bo_unmap(bo);
3721     pp_context->idrt.num_interface_descriptors++;
3722 }
3723
3724 static void
3725 gen6_pp_upload_constants(VADriverContextP ctx,
3726                          struct i965_post_processing_context *pp_context)
3727 {
3728     struct i965_driver_data *i965 = i965_driver_data(ctx);
3729     unsigned char *constant_buffer;
3730     int param_size;
3731
3732     assert(sizeof(struct pp_static_parameter) == 128);
3733     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3734
3735     if (IS_GEN7(i965->intel.device_id))
3736         param_size = sizeof(struct gen7_pp_static_parameter);
3737     else
3738         param_size = sizeof(struct pp_static_parameter);
3739
3740     dri_bo_map(pp_context->curbe.bo, 1);
3741     assert(pp_context->curbe.bo->virtual);
3742     constant_buffer = pp_context->curbe.bo->virtual;
3743     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3744     dri_bo_unmap(pp_context->curbe.bo);
3745 }
3746
3747 static void
3748 gen6_pp_states_setup(VADriverContextP ctx,
3749                      struct i965_post_processing_context *pp_context)
3750 {
3751     gen6_pp_interface_descriptor_table(ctx, pp_context);
3752     gen6_pp_upload_constants(ctx, pp_context);
3753 }
3754
3755 static void
3756 gen6_pp_pipeline_select(VADriverContextP ctx,
3757                         struct i965_post_processing_context *pp_context)
3758 {
3759     struct intel_batchbuffer *batch = pp_context->batch;
3760
3761     BEGIN_BATCH(batch, 1);
3762     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3763     ADVANCE_BATCH(batch);
3764 }
3765
3766 static void
3767 gen6_pp_state_base_address(VADriverContextP ctx,
3768                            struct i965_post_processing_context *pp_context)
3769 {
3770     struct intel_batchbuffer *batch = pp_context->batch;
3771
3772     BEGIN_BATCH(batch, 10);
3773     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3774     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3775     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3778     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3779     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3780     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3781     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3782     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3783     ADVANCE_BATCH(batch);
3784 }
3785
3786 static void
3787 gen6_pp_vfe_state(VADriverContextP ctx,
3788                   struct i965_post_processing_context *pp_context)
3789 {
3790     struct intel_batchbuffer *batch = pp_context->batch;
3791
3792     BEGIN_BATCH(batch, 8);
3793     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3794     OUT_BATCH(batch, 0);
3795     OUT_BATCH(batch,
3796               (pp_context->urb.num_vfe_entries - 1) << 16 |
3797               pp_context->urb.num_vfe_entries << 8);
3798     OUT_BATCH(batch, 0);
3799     OUT_BATCH(batch,
3800               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3801               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3802     OUT_BATCH(batch, 0);
3803     OUT_BATCH(batch, 0);
3804     OUT_BATCH(batch, 0);
3805     ADVANCE_BATCH(batch);
3806 }
3807
3808 static void
3809 gen6_pp_curbe_load(VADriverContextP ctx,
3810                    struct i965_post_processing_context *pp_context)
3811 {
3812     struct intel_batchbuffer *batch = pp_context->batch;
3813
3814     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3815
3816     BEGIN_BATCH(batch, 4);
3817     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3818     OUT_BATCH(batch, 0);
3819     OUT_BATCH(batch,
3820               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3821     OUT_RELOC(batch, 
3822               pp_context->curbe.bo,
3823               I915_GEM_DOMAIN_INSTRUCTION, 0,
3824               0);
3825     ADVANCE_BATCH(batch);
3826 }
3827
3828 static void
3829 gen6_interface_descriptor_load(VADriverContextP ctx,
3830                                struct i965_post_processing_context *pp_context)
3831 {
3832     struct intel_batchbuffer *batch = pp_context->batch;
3833
3834     BEGIN_BATCH(batch, 4);
3835     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3836     OUT_BATCH(batch, 0);
3837     OUT_BATCH(batch,
3838               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3839     OUT_RELOC(batch, 
3840               pp_context->idrt.bo,
3841               I915_GEM_DOMAIN_INSTRUCTION, 0,
3842               0);
3843     ADVANCE_BATCH(batch);
3844 }
3845
3846 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
3847 {
3848     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3849
3850     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3851     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
3852     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
3853     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
3854     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3855     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
3856
3857     /* 1 x N */
3858     if (x_steps == 1) {
3859         if (y == y_steps-1) {
3860             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
3861         }
3862         else {
3863             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
3864         }
3865     }
3866
3867     /* M x 1 */
3868     if (y_steps == 1) {
3869         if (x == 0) { // all blocks in this group are on the left edge
3870             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
3871             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
3872         }
3873         else if (x == x_steps-1) {
3874             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
3875             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
3876         }
3877         else {
3878             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3879             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3880             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
3881         }
3882     }
3883
3884 }
3885
3886 static void
3887 gen6_pp_object_walker(VADriverContextP ctx,
3888                       struct i965_post_processing_context *pp_context)
3889 {
3890     struct i965_driver_data *i965 = i965_driver_data(ctx);
3891     struct intel_batchbuffer *batch = pp_context->batch;
3892     int x, x_steps, y, y_steps;
3893     int param_size, command_length_in_dws;
3894     dri_bo *command_buffer;
3895     unsigned int *command_ptr;
3896
3897     if (IS_GEN7(i965->intel.device_id))
3898         param_size = sizeof(struct gen7_pp_inline_parameter);
3899     else
3900         param_size = sizeof(struct pp_inline_parameter);
3901
3902     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3903     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3904     command_length_in_dws = 6 + (param_size >> 2);
3905     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3906                                   "command objects buffer",
3907                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3908                                   4096);
3909
3910     dri_bo_map(command_buffer, 1);
3911     command_ptr = command_buffer->virtual;
3912
3913     for (y = 0; y < y_steps; y++) {
3914         for (x = 0; x < x_steps; x++) {
3915             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3916                 // some common block parameter update goes here, apply to all pp functions
3917                 if (IS_GEN6(i965->intel.device_id))
3918                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
3919                 
3920                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3921                 *command_ptr++ = 0;
3922                 *command_ptr++ = 0;
3923                 *command_ptr++ = 0;
3924                 *command_ptr++ = 0;
3925                 *command_ptr++ = 0;
3926                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3927                 command_ptr += (param_size >> 2);
3928             }
3929         }
3930     }
3931
3932     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3933         *command_ptr++ = 0;
3934
3935     *command_ptr = MI_BATCH_BUFFER_END;
3936
3937     dri_bo_unmap(command_buffer);
3938
3939     BEGIN_BATCH(batch, 2);
3940     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3941     OUT_RELOC(batch, command_buffer, 
3942               I915_GEM_DOMAIN_COMMAND, 0, 
3943               0);
3944     ADVANCE_BATCH(batch);
3945     
3946     dri_bo_unreference(command_buffer);
3947
3948     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3949      * will cause control to pass back to ring buffer 
3950      */
3951     intel_batchbuffer_end_atomic(batch);
3952     intel_batchbuffer_flush(batch);
3953     intel_batchbuffer_start_atomic(batch, 0x1000);
3954 }
3955
3956 static void
3957 gen6_pp_pipeline_setup(VADriverContextP ctx,
3958                        struct i965_post_processing_context *pp_context)
3959 {
3960     struct intel_batchbuffer *batch = pp_context->batch;
3961
3962     intel_batchbuffer_start_atomic(batch, 0x1000);
3963     intel_batchbuffer_emit_mi_flush(batch);
3964     gen6_pp_pipeline_select(ctx, pp_context);
3965     gen6_pp_state_base_address(ctx, pp_context);
3966     gen6_pp_vfe_state(ctx, pp_context);
3967     gen6_pp_curbe_load(ctx, pp_context);
3968     gen6_interface_descriptor_load(ctx, pp_context);
3969     gen6_pp_object_walker(ctx, pp_context);
3970     intel_batchbuffer_end_atomic(batch);
3971 }
3972
3973 static VAStatus
3974 gen6_post_processing(
3975     VADriverContextP   ctx,
3976     struct i965_post_processing_context *pp_context,
3977     const struct i965_surface *src_surface,
3978     const VARectangle *src_rect,
3979     struct i965_surface *dst_surface,
3980     const VARectangle *dst_rect,
3981     int                pp_index,
3982     void * filter_param
3983 )
3984 {
3985     VAStatus va_status;
3986     
3987     va_status = gen6_pp_initialize(ctx, pp_context,
3988                                    src_surface,
3989                                    src_rect,
3990                                    dst_surface,
3991                                    dst_rect,
3992                                    pp_index,
3993                                    filter_param);
3994
3995     if (va_status == VA_STATUS_SUCCESS) {
3996         gen6_pp_states_setup(ctx, pp_context);
3997         gen6_pp_pipeline_setup(ctx, pp_context);
3998     }
3999
4000     return va_status;
4001 }
4002
4003 static VAStatus
4004 i965_post_processing_internal(
4005     VADriverContextP   ctx,
4006     struct i965_post_processing_context *pp_context,
4007     const struct i965_surface *src_surface,
4008     const VARectangle *src_rect,
4009     struct i965_surface *dst_surface,
4010     const VARectangle *dst_rect,
4011     int                pp_index,
4012     void *filter_param
4013 )
4014 {
4015     struct i965_driver_data *i965 = i965_driver_data(ctx);
4016     VAStatus va_status;
4017
4018     if (IS_GEN6(i965->intel.device_id) ||
4019         IS_GEN7(i965->intel.device_id))
4020         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4021     else
4022         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4023     
4024     return va_status;
4025 }
4026
4027 VAStatus 
4028 i965_DestroySurfaces(VADriverContextP ctx,
4029                      VASurfaceID *surface_list,
4030                      int num_surfaces);
4031 VAStatus 
4032 i965_CreateSurfaces(VADriverContextP ctx,
4033                     int width,
4034                     int height,
4035                     int format,
4036                     int num_surfaces,
4037                     VASurfaceID *surfaces);
4038
4039 static void
4040 rgb_to_yuv(unsigned int argb,
4041            unsigned char *y,
4042            unsigned char *u,
4043            unsigned char *v,
4044            unsigned char *a)
4045 {
4046     int r = ((argb >> 16) & 0xff);
4047     int g = ((argb >> 8) & 0xff);
4048     int b = ((argb >> 0) & 0xff);
4049     
4050     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4051     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4052     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4053     *a = ((argb >> 24) & 0xff);
4054 }
4055
4056 static void 
4057 i965_vpp_clear_surface(VADriverContextP ctx,
4058                        struct i965_post_processing_context *pp_context,
4059                        VASurfaceID surface,
4060                        unsigned int color)
4061 {
4062     struct i965_driver_data *i965 = i965_driver_data(ctx);
4063     struct intel_batchbuffer *batch = pp_context->batch;
4064     struct object_surface *obj_surface = SURFACE(surface);
4065     unsigned int blt_cmd, br13;
4066     unsigned int tiling = 0, swizzle = 0;
4067     int pitch;
4068     unsigned char y, u, v, a = 0;
4069
4070     /* Currently only support NV12 surface */
4071     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4072         return;
4073
4074     rgb_to_yuv(color, &y, &u, &v, &a);
4075
4076     if (a == 0)
4077         return;
4078
4079     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4080     blt_cmd = XY_COLOR_BLT_CMD;
4081     pitch = obj_surface->width;
4082
4083     if (tiling != I915_TILING_NONE) {
4084         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4085         pitch >>= 2;
4086     }
4087
4088     br13 = 0xf0 << 16;
4089     br13 |= BR13_8;
4090     br13 |= pitch;
4091
4092     if (IS_GEN6(i965->intel.device_id) ||
4093         IS_GEN7(i965->intel.device_id)) {
4094         intel_batchbuffer_start_atomic_blt(batch, 48);
4095         BEGIN_BLT_BATCH(batch, 12);
4096     } else {
4097         intel_batchbuffer_start_atomic(batch, 48);
4098         BEGIN_BATCH(batch, 12);
4099     }
4100
4101     OUT_BATCH(batch, blt_cmd);
4102     OUT_BATCH(batch, br13);
4103     OUT_BATCH(batch,
4104               0 << 16 |
4105               0);
4106     OUT_BATCH(batch,
4107               obj_surface->height << 16 |
4108               obj_surface->width);
4109     OUT_RELOC(batch, obj_surface->bo, 
4110               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4111               0);
4112     OUT_BATCH(batch, y);
4113
4114     br13 = 0xf0 << 16;
4115     br13 |= BR13_565;
4116     br13 |= pitch;
4117
4118     OUT_BATCH(batch, blt_cmd);
4119     OUT_BATCH(batch, br13);
4120     OUT_BATCH(batch,
4121               0 << 16 |
4122               0);
4123     OUT_BATCH(batch,
4124               obj_surface->height / 2 << 16 |
4125               obj_surface->width / 2);
4126     OUT_RELOC(batch, obj_surface->bo, 
4127               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4128               obj_surface->width * obj_surface->y_cb_offset);
4129     OUT_BATCH(batch, v << 8 | u);
4130
4131     ADVANCE_BATCH(batch);
4132     intel_batchbuffer_end_atomic(batch);
4133 }
4134
4135 VAStatus
4136 i965_scaling_processing(
4137     VADriverContextP   ctx,
4138     VASurfaceID        src_surface_id,
4139     const VARectangle *src_rect,
4140     VASurfaceID        dst_surface_id,
4141     const VARectangle *dst_rect,
4142     unsigned int       flags)
4143 {
4144     VAStatus va_status = VA_STATUS_SUCCESS;
4145     struct i965_driver_data *i965 = i965_driver_data(ctx);
4146     struct object_surface *src_surface_obj = SURFACE(src_surface_id);
4147     struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
4148  
4149     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4150     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4151
4152     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4153         struct i965_surface src_surface;
4154         struct i965_surface dst_surface;
4155
4156          _i965LockMutex(&i965->pp_mutex);
4157
4158          src_surface.id = src_surface_id;
4159          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4160          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4161          dst_surface.id = dst_surface_id;
4162          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4163          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4164
4165          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4166                                                    &src_surface,
4167                                                    src_rect,
4168                                                    &dst_surface,
4169                                                    dst_rect,
4170                                                    PP_NV12_AVS,
4171                                                    NULL);
4172
4173          _i965UnlockMutex(&i965->pp_mutex);
4174     }
4175
4176     return va_status;
4177 }
4178
4179 VASurfaceID
4180 i965_post_processing(
4181     VADriverContextP   ctx,
4182     VASurfaceID        surface,
4183     const VARectangle *src_rect,
4184     const VARectangle *dst_rect,
4185     unsigned int       flags,
4186     int               *has_done_scaling  
4187 )
4188 {
4189     struct i965_driver_data *i965 = i965_driver_data(ctx);
4190     VASurfaceID in_surface_id = surface;
4191     VASurfaceID out_surface_id = VA_INVALID_ID;
4192     
4193     *has_done_scaling = 0;
4194
4195     if (HAS_PP(i965)) {
4196         struct object_surface *obj_surface;
4197         VAStatus status;
4198         struct i965_surface src_surface;
4199         struct i965_surface dst_surface;
4200
4201         obj_surface = SURFACE(in_surface_id);
4202
4203         /* Currently only support post processing for NV12 surface */
4204         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4205             return out_surface_id;
4206
4207         _i965LockMutex(&i965->pp_mutex);
4208
4209         if (flags & I965_PP_FLAG_MCDI) {
4210             status = i965_CreateSurfaces(ctx,
4211                                          obj_surface->orig_width,
4212                                          obj_surface->orig_height,
4213                                          VA_RT_FORMAT_YUV420,
4214                                          1,
4215                                          &out_surface_id);
4216             assert(status == VA_STATUS_SUCCESS);
4217             obj_surface = SURFACE(out_surface_id);
4218             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4219             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4220             src_surface.id = in_surface_id;
4221             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4222             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4223                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4224             dst_surface.id = out_surface_id;
4225             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4226             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4227
4228             i965_post_processing_internal(ctx, i965->pp_context,
4229                                           &src_surface,
4230                                           src_rect,
4231                                           &dst_surface,
4232                                           dst_rect,
4233                                           PP_NV12_DNDI,
4234                                           NULL);
4235         }
4236
4237         if (flags & I965_PP_FLAG_AVS) {
4238             struct i965_render_state *render_state = &i965->render_state;
4239             struct intel_region *dest_region = render_state->draw_region;
4240
4241             if (out_surface_id != VA_INVALID_ID)
4242                 in_surface_id = out_surface_id;
4243
4244             status = i965_CreateSurfaces(ctx,
4245                                          dest_region->width,
4246                                          dest_region->height,
4247                                          VA_RT_FORMAT_YUV420,
4248                                          1,
4249                                          &out_surface_id);
4250             assert(status == VA_STATUS_SUCCESS);
4251             obj_surface = SURFACE(out_surface_id);
4252             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4253             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4254             src_surface.id = in_surface_id;
4255             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4256             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4257             dst_surface.id = out_surface_id;
4258             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4259             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4260
4261             i965_post_processing_internal(ctx, i965->pp_context,
4262                                           &src_surface,
4263                                           src_rect,
4264                                           &dst_surface,
4265                                           dst_rect,
4266                                           PP_NV12_AVS,
4267                                           NULL);
4268
4269             if (in_surface_id != surface)
4270                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4271                 
4272             *has_done_scaling = 1;
4273         }
4274
4275         _i965UnlockMutex(&i965->pp_mutex);
4276     }
4277
4278     return out_surface_id;
4279 }       
4280
4281 static VAStatus
4282 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4283                           const struct i965_surface *src_surface,
4284                           const VARectangle *src_rect,
4285                           struct i965_surface *dst_surface,
4286                           const VARectangle *dst_rect)
4287 {
4288     struct i965_driver_data *i965 = i965_driver_data(ctx);
4289     struct i965_post_processing_context *pp_context = i965->pp_context;
4290     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4291
4292     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4293         i965_post_processing_internal(ctx, i965->pp_context,
4294                                       src_surface,
4295                                       src_rect,
4296                                       dst_surface,
4297                                       dst_rect,
4298                                       PP_RGBX_LOAD_SAVE_NV12,
4299                                       NULL);
4300     } else {
4301         assert(0);
4302         return VA_STATUS_ERROR_UNKNOWN;
4303     }
4304
4305     intel_batchbuffer_flush(pp_context->batch);
4306
4307     return VA_STATUS_SUCCESS;
4308 }
4309
4310 static VAStatus
4311 i965_image_pl3_processing(VADriverContextP ctx,
4312                           const struct i965_surface *src_surface,
4313                           const VARectangle *src_rect,
4314                           struct i965_surface *dst_surface,
4315                           const VARectangle *dst_rect)
4316 {
4317     struct i965_driver_data *i965 = i965_driver_data(ctx);
4318     struct i965_post_processing_context *pp_context = i965->pp_context;
4319     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4320     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4321
4322     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4323         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4324                                                  src_surface,
4325                                                  src_rect,
4326                                                  dst_surface,
4327                                                  dst_rect,
4328                                                  PP_PL3_LOAD_SAVE_N12,
4329                                                  NULL);
4330     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4331                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4332                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4333                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4334         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4335                                                  src_surface,
4336                                                  src_rect,
4337                                                  dst_surface,
4338                                                  dst_rect,
4339                                                  PP_PL3_LOAD_SAVE_PL3,
4340                                                  NULL);
4341     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4342                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4343         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4344                                                  src_surface,
4345                                                  src_rect,
4346                                                  dst_surface,
4347                                                  dst_rect,
4348                                                  PP_PL3_LOAD_SAVE_PA,
4349                                                  NULL);
4350     }
4351     else {
4352         assert(0);
4353     }
4354
4355     intel_batchbuffer_flush(pp_context->batch);
4356
4357     return vaStatus;
4358 }
4359
4360 static VAStatus
4361 i965_image_pl2_processing(VADriverContextP ctx,
4362                           const struct i965_surface *src_surface,
4363                           const VARectangle *src_rect,
4364                           struct i965_surface *dst_surface,
4365                           const VARectangle *dst_rect)
4366 {
4367     struct i965_driver_data *i965 = i965_driver_data(ctx);
4368     struct i965_post_processing_context *pp_context = i965->pp_context;
4369     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4370     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4371
4372     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4373         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4374                                                  src_surface,
4375                                                  src_rect,
4376                                                  dst_surface,
4377                                                  dst_rect,
4378                                                  PP_NV12_LOAD_SAVE_N12,
4379                                                  NULL);
4380     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4381                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4382                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4383                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4384         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4385                                                  src_surface,
4386                                                  src_rect,
4387                                                  dst_surface,
4388                                                  dst_rect,
4389                                                  PP_NV12_LOAD_SAVE_PL3,
4390                                                  NULL);
4391     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4392                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4393         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4394                                                  src_surface,
4395                                                  src_rect,
4396                                                  dst_surface,
4397                                                  dst_rect,
4398                                                  PP_NV12_LOAD_SAVE_PA,
4399                                                      NULL);
4400     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4401                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4402                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4403                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4404         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4405                                       src_surface,
4406                                       src_rect,
4407                                       dst_surface,
4408                                       dst_rect,
4409                                       PP_NV12_LOAD_SAVE_RGBX,
4410                                       NULL);
4411     } else {
4412         assert(0);
4413         return VA_STATUS_ERROR_UNKNOWN;
4414     }
4415
4416     intel_batchbuffer_flush(pp_context->batch);
4417
4418     return vaStatus;
4419 }
4420
4421 static VAStatus
4422 i965_image_pl1_processing(VADriverContextP ctx,
4423                           const struct i965_surface *src_surface,
4424                           const VARectangle *src_rect,
4425                           struct i965_surface *dst_surface,
4426                           const VARectangle *dst_rect)
4427 {
4428     struct i965_driver_data *i965 = i965_driver_data(ctx);
4429     struct i965_post_processing_context *pp_context = i965->pp_context;
4430     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4431
4432     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4433         i965_post_processing_internal(ctx, i965->pp_context,
4434                                       src_surface,
4435                                       src_rect,
4436                                       dst_surface,
4437                                       dst_rect,
4438                                       PP_PA_LOAD_SAVE_NV12,
4439                                       NULL);
4440     }
4441     else if (fourcc == VA_FOURCC_YV12) {
4442         i965_post_processing_internal(ctx, i965->pp_context,
4443                                       src_surface,
4444                                       src_rect,
4445                                       dst_surface,
4446                                       dst_rect,
4447                                       PP_PA_LOAD_SAVE_PL3,
4448                                       NULL);
4449
4450     }
4451     else {
4452         return VA_STATUS_ERROR_UNKNOWN;
4453     }
4454
4455     intel_batchbuffer_flush(pp_context->batch);
4456
4457     return VA_STATUS_SUCCESS;
4458 }
4459
4460 VAStatus
4461 i965_image_processing(VADriverContextP ctx,
4462                       const struct i965_surface *src_surface,
4463                       const VARectangle *src_rect,
4464                       struct i965_surface *dst_surface,
4465                       const VARectangle *dst_rect)
4466 {
4467     struct i965_driver_data *i965 = i965_driver_data(ctx);
4468     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4469
4470     if (HAS_PP(i965)) {
4471         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4472
4473         _i965LockMutex(&i965->pp_mutex);
4474
4475         switch (fourcc) {
4476         case VA_FOURCC('Y', 'V', '1', '2'):
4477         case VA_FOURCC('I', '4', '2', '0'):
4478         case VA_FOURCC('I', 'M', 'C', '1'):
4479         case VA_FOURCC('I', 'M', 'C', '3'):
4480             status = i965_image_pl3_processing(ctx,
4481                                                src_surface,
4482                                                src_rect,
4483                                                dst_surface,
4484                                                dst_rect);
4485             break;
4486
4487         case  VA_FOURCC('N', 'V', '1', '2'):
4488             status = i965_image_pl2_processing(ctx,
4489                                                src_surface,
4490                                                src_rect,
4491                                                dst_surface,
4492                                                dst_rect);
4493             break;
4494         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4495         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4496             status = i965_image_pl1_processing(ctx,
4497                                                src_surface,
4498                                                src_rect,
4499                                                dst_surface,
4500                                                dst_rect);
4501             break;
4502         case VA_FOURCC('B', 'G', 'R', 'A'):
4503         case VA_FOURCC('B', 'G', 'R', 'X'):
4504         case VA_FOURCC('R', 'G', 'B', 'A'):
4505         case VA_FOURCC('R', 'G', 'B', 'X'):
4506             status = i965_image_pl1_rgbx_processing(ctx,
4507                                                src_surface,
4508                                                src_rect,
4509                                                dst_surface,
4510                                                dst_rect);
4511             break;
4512         default:
4513             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4514             break;
4515         }
4516         
4517         _i965UnlockMutex(&i965->pp_mutex);
4518     }
4519
4520     return status;
4521 }       
4522
4523 static void
4524 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4525 {
4526     int i;
4527
4528     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4529     pp_context->surface_state_binding_table.bo = NULL;
4530
4531     dri_bo_unreference(pp_context->curbe.bo);
4532     pp_context->curbe.bo = NULL;
4533
4534     dri_bo_unreference(pp_context->sampler_state_table.bo);
4535     pp_context->sampler_state_table.bo = NULL;
4536
4537     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4538     pp_context->sampler_state_table.bo_8x8 = NULL;
4539
4540     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4541     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4542
4543     dri_bo_unreference(pp_context->idrt.bo);
4544     pp_context->idrt.bo = NULL;
4545     pp_context->idrt.num_interface_descriptors = 0;
4546
4547     dri_bo_unreference(pp_context->vfe_state.bo);
4548     pp_context->vfe_state.bo = NULL;
4549
4550     dri_bo_unreference(pp_context->stmm.bo);
4551     pp_context->stmm.bo = NULL;
4552
4553     for (i = 0; i < NUM_PP_MODULES; i++) {
4554         struct pp_module *pp_module = &pp_context->pp_modules[i];
4555
4556         dri_bo_unreference(pp_module->kernel.bo);
4557         pp_module->kernel.bo = NULL;
4558     }
4559
4560     free(pp_context->pp_static_parameter);
4561     free(pp_context->pp_inline_parameter);
4562     pp_context->pp_static_parameter = NULL;
4563     pp_context->pp_inline_parameter = NULL;
4564 }
4565
4566 Bool
4567 i965_post_processing_terminate(VADriverContextP ctx)
4568 {
4569     struct i965_driver_data *i965 = i965_driver_data(ctx);
4570     struct i965_post_processing_context *pp_context = i965->pp_context;
4571
4572     if (pp_context) {
4573         i965_post_processing_context_finalize(pp_context);
4574         free(pp_context);
4575     }
4576
4577     i965->pp_context = NULL;
4578
4579     return True;
4580 }
4581
4582 static void
4583 i965_post_processing_context_init(VADriverContextP ctx,
4584                                   struct i965_post_processing_context *pp_context,
4585                                   struct intel_batchbuffer *batch)
4586 {
4587     struct i965_driver_data *i965 = i965_driver_data(ctx);
4588     int i;
4589
4590     pp_context->urb.size = URB_SIZE((&i965->intel));
4591     pp_context->urb.num_vfe_entries = 32;
4592     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4593     pp_context->urb.num_cs_entries = 1;
4594     
4595     if (IS_GEN7(i965->intel.device_id))
4596         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4597     else
4598         pp_context->urb.size_cs_entry = 2;
4599
4600     pp_context->urb.vfe_start = 0;
4601     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4602         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4603     assert(pp_context->urb.cs_start + 
4604            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4605
4606     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4607     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4608     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4609
4610     if (IS_GEN7(i965->intel.device_id))
4611         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4612     else if (IS_GEN6(i965->intel.device_id))
4613         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4614     else if (IS_IRONLAKE(i965->intel.device_id))
4615         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4616
4617     for (i = 0; i < NUM_PP_MODULES; i++) {
4618         struct pp_module *pp_module = &pp_context->pp_modules[i];
4619         dri_bo_unreference(pp_module->kernel.bo);
4620         if (pp_module->kernel.bin && pp_module->kernel.size) {
4621             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4622                                                 pp_module->kernel.name,
4623                                                 pp_module->kernel.size,
4624                                                 4096);
4625             assert(pp_module->kernel.bo);
4626             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4627         } else {
4628             pp_module->kernel.bo = NULL;
4629         }
4630     }
4631
4632     /* static & inline parameters */
4633     if (IS_GEN7(i965->intel.device_id)) {
4634         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4635         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4636     } else {
4637         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4638         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4639     }
4640
4641     pp_context->batch = batch;
4642 }
4643
4644 Bool
4645 i965_post_processing_init(VADriverContextP ctx)
4646 {
4647     struct i965_driver_data *i965 = i965_driver_data(ctx);
4648     struct i965_post_processing_context *pp_context = i965->pp_context;
4649
4650     if (HAS_PP(i965)) {
4651         if (pp_context == NULL) {
4652             pp_context = calloc(1, sizeof(*pp_context));
4653             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4654             i965->pp_context = pp_context;
4655         }
4656     }
4657
4658     return True;
4659 }
4660
4661 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4662     PP_NULL,    /* VAProcFilterNone */
4663     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4664     PP_NULL,    /* VAProcFilterDeblocking */
4665     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4666     PP_NULL,    /* VAProcFilterSharpening */
4667     PP_NULL,    /* VAProcFilterColorBalance */
4668     PP_NULL,    /* VAProcFilterColorStandard */
4669     PP_NULL,    /* VAProcFilterFrameRateConversion */
4670 };
4671
4672 static const int proc_frame_to_pp_frame[3] = {
4673     I965_SURFACE_FLAG_FRAME,
4674     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4675     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4676 };
4677
4678 void 
4679 i965_proc_picture(VADriverContextP ctx, 
4680                   VAProfile profile, 
4681                   union codec_state *codec_state,
4682                   struct hw_context *hw_context)
4683 {
4684     struct i965_driver_data *i965 = i965_driver_data(ctx);
4685     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4686     struct proc_state *proc_state = &codec_state->proc;
4687     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4688     struct object_surface *obj_surface;
4689     struct i965_surface src_surface, dst_surface;
4690     VARectangle src_rect, dst_rect;
4691     VAStatus status;
4692     int i;
4693     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4694     int num_tmp_surfaces = 0;
4695     unsigned int tiling = 0, swizzle = 0;
4696     int in_width, in_height;
4697
4698     assert(pipeline_param->surface != VA_INVALID_ID);
4699     assert(proc_state->current_render_target != VA_INVALID_ID);
4700
4701     obj_surface = SURFACE(pipeline_param->surface);
4702     in_width = obj_surface->orig_width;
4703     in_height = obj_surface->orig_height;
4704     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4705
4706     src_surface.id = pipeline_param->surface;
4707     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4708     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4709
4710     VASurfaceID out_surface_id = VA_INVALID_ID;
4711     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4712         src_surface.id = pipeline_param->surface;
4713         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4714         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4715         src_rect.x = 0;
4716         src_rect.y = 0;
4717         src_rect.width = in_width;
4718         src_rect.height = in_height;
4719
4720         status = i965_CreateSurfaces(ctx,
4721                                      in_width,
4722                                      in_height,
4723                                      VA_RT_FORMAT_YUV420,
4724                                      1,
4725                                      &out_surface_id);
4726         assert(status == VA_STATUS_SUCCESS);
4727         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4728         obj_surface = SURFACE(out_surface_id);
4729         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4730
4731         dst_surface.id = out_surface_id;
4732         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4733         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4734         dst_rect.x = 0;
4735         dst_rect.y = 0;
4736         dst_rect.width = in_width;
4737         dst_rect.height = in_height;
4738
4739         status = i965_image_processing(ctx,
4740                                        &src_surface,
4741                                        &src_rect,
4742                                        &dst_surface,
4743                                        &dst_rect);
4744         assert(status == VA_STATUS_SUCCESS);
4745
4746         src_surface.id = out_surface_id;
4747         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4748         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4749     }
4750
4751     if (pipeline_param->surface_region) {
4752         src_rect.x = pipeline_param->surface_region->x;
4753         src_rect.y = pipeline_param->surface_region->y;
4754         src_rect.width = pipeline_param->surface_region->width;
4755         src_rect.height = pipeline_param->surface_region->height;
4756     } else {
4757         src_rect.x = 0;
4758         src_rect.y = 0;
4759         src_rect.width = in_width;
4760         src_rect.height = in_height;
4761     }
4762
4763     if (pipeline_param->output_region) {
4764         dst_rect.x = pipeline_param->output_region->x;
4765         dst_rect.y = pipeline_param->output_region->y;
4766         dst_rect.width = pipeline_param->output_region->width;
4767         dst_rect.height = pipeline_param->output_region->height;
4768     } else {
4769         dst_rect.x = 0;
4770         dst_rect.y = 0;
4771         dst_rect.width = in_width;
4772         dst_rect.height = in_height;
4773     }
4774
4775     for (i = 0; i < pipeline_param->num_filters; i++) {
4776         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4777         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4778         VAProcFilterType filter_type = filter_param->type;
4779         out_surface_id = VA_INVALID_ID;
4780         int kernel_index = procfilter_to_pp_flag[filter_type];
4781
4782         if (kernel_index != PP_NULL &&
4783             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4784             status = i965_CreateSurfaces(ctx,
4785                                          in_width,
4786                                          in_height,
4787                                          VA_RT_FORMAT_YUV420,
4788                                          1,
4789                                          &out_surface_id);
4790             assert(status == VA_STATUS_SUCCESS);
4791             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4792             obj_surface = SURFACE(out_surface_id);
4793             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4794             dst_surface.id = out_surface_id;
4795             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4796             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4797                                                    &src_surface,
4798                                                    &src_rect,
4799                                                    &dst_surface,
4800                                                    &src_rect,
4801                                                    kernel_index,
4802                                                    filter_param);
4803
4804             if (status == VA_STATUS_SUCCESS) {
4805                 src_surface.id = dst_surface.id;
4806                 src_surface.type = dst_surface.type;
4807                 src_surface.flags = dst_surface.flags;
4808             }
4809         }
4810     }
4811
4812     obj_surface = SURFACE(proc_state->current_render_target);
4813     int csc_needed = 0;
4814     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
4815         csc_needed = 1;
4816         out_surface_id = VA_INVALID_ID;
4817         status = i965_CreateSurfaces(ctx,
4818                                      obj_surface->orig_width,
4819                                      obj_surface->orig_height,
4820                                      VA_RT_FORMAT_YUV420, 
4821                                      1,
4822                                      &out_surface_id);
4823         assert(status == VA_STATUS_SUCCESS);
4824         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4825         struct object_surface *csc_surface = SURFACE(out_surface_id);
4826         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4827         dst_surface.id = out_surface_id;
4828     } else {
4829         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4830         dst_surface.id = proc_state->current_render_target;
4831     }
4832
4833     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4834     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4835
4836     // load/save doesn't support different origin offset for src and dst surface
4837     if (src_rect.width == dst_rect.width &&
4838         src_rect.height == dst_rect.height &&
4839         src_rect.x == dst_rect.x &&
4840         src_rect.y == dst_rect.y) {
4841         i965_post_processing_internal(ctx, &proc_context->pp_context,
4842                                       &src_surface,
4843                                       &src_rect,
4844                                       &dst_surface,
4845                                       &dst_rect,
4846                                       PP_NV12_LOAD_SAVE_N12,
4847                                       NULL);
4848     } else {
4849
4850         i965_post_processing_internal(ctx, &proc_context->pp_context,
4851                                       &src_surface,
4852                                       &src_rect,
4853                                       &dst_surface,
4854                                       &dst_rect,
4855                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4856                                       PP_NV12_AVS : PP_NV12_SCALING,
4857                                       NULL);
4858     }
4859
4860     if (csc_needed) {
4861         src_surface.id = dst_surface.id;
4862         src_surface.type = dst_surface.type;
4863         src_surface.flags = dst_surface.flags;
4864         dst_surface.id = proc_state->current_render_target;
4865         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4866         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
4867     }
4868     
4869     if (num_tmp_surfaces)
4870         i965_DestroySurfaces(ctx,
4871                              tmp_surfaces,
4872                              num_tmp_surfaces);
4873
4874     intel_batchbuffer_flush(hw_context->batch);
4875 }
4876
4877 static void
4878 i965_proc_context_destroy(void *hw_context)
4879 {
4880     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4881
4882     i965_post_processing_context_finalize(&proc_context->pp_context);
4883     intel_batchbuffer_free(proc_context->base.batch);
4884     free(proc_context);
4885 }
4886
4887 struct hw_context *
4888 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4889 {
4890     struct intel_driver_data *intel = intel_driver_data(ctx);
4891     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4892
4893     proc_context->base.destroy = i965_proc_context_destroy;
4894     proc_context->base.run = i965_proc_picture;
4895     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4896     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4897
4898     return (struct hw_context *)proc_context;
4899 }