Add CSC conversion from NV12 to RGBX for VPP on Ivy
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
639 };
640
641 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
642                                            const struct i965_surface *src_surface,
643                                            const VARectangle *src_rect,
644                                            struct i965_surface *dst_surface,
645                                            const VARectangle *dst_rect,
646                                            void *filter_param);
647 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
648                                              const struct i965_surface *src_surface,
649                                              const VARectangle *src_rect,
650                                              struct i965_surface *dst_surface,
651                                              const VARectangle *dst_rect,
652                                              void *filter_param);
653 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
654                                            const struct i965_surface *src_surface,
655                                            const VARectangle *src_rect,
656                                            struct i965_surface *dst_surface,
657                                            const VARectangle *dst_rect,
658                                            void *filter_param);
659
660 static struct pp_module pp_modules_gen7[] = {
661     {
662         {
663             "NULL module (for testing)",
664             PP_NULL,
665             pp_null_gen7,
666             sizeof(pp_null_gen7),
667             NULL,
668         },
669
670         pp_null_initialize,
671     },
672
673     {
674         {
675             "NV12_NV12",
676             PP_NV12_LOAD_SAVE_N12,
677             pp_nv12_load_save_nv12_gen7,
678             sizeof(pp_nv12_load_save_nv12_gen7),
679             NULL,
680         },
681
682         gen7_pp_plx_avs_initialize,
683     },
684
685     {
686         {
687             "NV12_PL3",
688             PP_NV12_LOAD_SAVE_PL3,
689             pp_nv12_load_save_pl3_gen7,
690             sizeof(pp_nv12_load_save_pl3_gen7),
691             NULL,
692         },
693         
694         gen7_pp_plx_avs_initialize,
695     },
696
697     {
698         {
699             "PL3_NV12",
700             PP_PL3_LOAD_SAVE_N12,
701             pp_pl3_load_save_nv12_gen7,
702             sizeof(pp_pl3_load_save_nv12_gen7),
703             NULL,
704         },
705
706         gen7_pp_plx_avs_initialize,
707     },
708
709     {
710         {
711             "PL3_PL3",
712             PP_PL3_LOAD_SAVE_N12,
713             pp_pl3_load_save_pl3_gen7,
714             sizeof(pp_pl3_load_save_pl3_gen7),
715             NULL,
716         },
717
718         gen7_pp_plx_avs_initialize,
719     },
720
721     {
722         {
723             "NV12 Scaling module",
724             PP_NV12_SCALING,
725             pp_nv12_scaling_gen7,
726             sizeof(pp_nv12_scaling_gen7),
727             NULL,
728         },
729
730         gen7_pp_plx_avs_initialize,
731     },
732
733     {
734         {
735             "NV12 AVS module",
736             PP_NV12_AVS,
737             pp_nv12_avs_gen7,
738             sizeof(pp_nv12_avs_gen7),
739             NULL,
740         },
741
742         gen7_pp_plx_avs_initialize,
743     },
744
745     {
746         {
747             "NV12 DNDI module",
748             PP_NV12_DNDI,
749             pp_nv12_dndi_gen7,
750             sizeof(pp_nv12_dndi_gen7),
751             NULL,
752         },
753
754         gen7_pp_nv12_dndi_initialize,
755     },
756
757     {
758         {
759             "NV12 DN module",
760             PP_NV12_DN,
761             pp_nv12_dn_gen7,
762             sizeof(pp_nv12_dn_gen7),
763             NULL,
764         },
765
766         gen7_pp_nv12_dn_initialize,
767     },
768     {
769         {
770             "NV12_PA module",
771             PP_NV12_LOAD_SAVE_PA,
772             pp_nv12_load_save_pa_gen7,
773             sizeof(pp_nv12_load_save_pa_gen7),
774             NULL,
775         },
776     
777         gen7_pp_plx_avs_initialize,
778     },
779
780     {
781         {
782             "PL3_PA module",
783             PP_PL3_LOAD_SAVE_PA,
784             pp_pl3_load_save_pa_gen7,
785             sizeof(pp_pl3_load_save_pa_gen7),
786             NULL,
787         },
788     
789         gen7_pp_plx_avs_initialize,
790     },
791
792     {
793         {
794             "PA_NV12 module",
795             PP_PA_LOAD_SAVE_NV12,
796             pp_pa_load_save_nv12_gen7,
797             sizeof(pp_pa_load_save_nv12_gen7),
798             NULL,
799         },
800     
801         gen7_pp_plx_avs_initialize,
802     },
803
804     {
805         {
806             "PA_PL3 module",
807             PP_PA_LOAD_SAVE_PL3,
808             pp_pa_load_save_pl3_gen7,
809             sizeof(pp_pa_load_save_pl3_gen7),
810             NULL,
811         },
812     
813         gen7_pp_plx_avs_initialize,
814     },
815     
816     {
817         {
818             "RGBX_NV12 module",
819             PP_RGBX_LOAD_SAVE_NV12,
820             pp_rgbx_load_save_nv12_gen7,
821             sizeof(pp_rgbx_load_save_nv12_gen7),
822             NULL,
823         },
824     
825         pp_plx_load_save_plx_initialize,
826     },
827
828     {
829         {
830             "NV12_RGBX module",
831             PP_NV12_LOAD_SAVE_RGBX,
832             pp_nv12_load_save_rgbx_gen7,
833             sizeof(pp_nv12_load_save_rgbx_gen7),
834             NULL,
835         },
836     
837         gen7_pp_plx_avs_initialize,
838     },
839             
840 };
841
842 static const uint32_t pp_null_gen75[][4] = {
843 };
844
845 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
846 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
847 };
848
849 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
850 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
851 };
852
853 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
854 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
855 };
856
857 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
858 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
859 };
860
861 static const uint32_t pp_nv12_scaling_gen75[][4] = {
862 #include "shaders/post_processing/gen7/avs.g75b"
863 };
864
865 static const uint32_t pp_nv12_avs_gen75[][4] = {
866 #include "shaders/post_processing/gen7/avs.g75b"
867 };
868
869 static const uint32_t pp_nv12_dndi_gen75[][4] = {
870 // #include "shaders/post_processing/gen7/dndi.g75b"
871 };
872
873 static const uint32_t pp_nv12_dn_gen75[][4] = {
874 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
875 };
876 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
877 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
878 };
879 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
880 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
881 };
882 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
883 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
884 };
885 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
886 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
887 };
888 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
889 };
890 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
891 };
892
893 static struct pp_module pp_modules_gen75[] = {
894     {
895         {
896             "NULL module (for testing)",
897             PP_NULL,
898             pp_null_gen75,
899             sizeof(pp_null_gen75),
900             NULL,
901         },
902
903         pp_null_initialize,
904     },
905
906     {
907         {
908             "NV12_NV12",
909             PP_NV12_LOAD_SAVE_N12,
910             pp_nv12_load_save_nv12_gen75,
911             sizeof(pp_nv12_load_save_nv12_gen75),
912             NULL,
913         },
914
915         gen7_pp_plx_avs_initialize,
916     },
917
918     {
919         {
920             "NV12_PL3",
921             PP_NV12_LOAD_SAVE_PL3,
922             pp_nv12_load_save_pl3_gen75,
923             sizeof(pp_nv12_load_save_pl3_gen75),
924             NULL,
925         },
926         
927         gen7_pp_plx_avs_initialize,
928     },
929
930     {
931         {
932             "PL3_NV12",
933             PP_PL3_LOAD_SAVE_N12,
934             pp_pl3_load_save_nv12_gen75,
935             sizeof(pp_pl3_load_save_nv12_gen75),
936             NULL,
937         },
938
939         gen7_pp_plx_avs_initialize,
940     },
941
942     {
943         {
944             "PL3_PL3",
945             PP_PL3_LOAD_SAVE_N12,
946             pp_pl3_load_save_pl3_gen75,
947             sizeof(pp_pl3_load_save_pl3_gen75),
948             NULL,
949         },
950
951         gen7_pp_plx_avs_initialize,
952     },
953
954     {
955         {
956             "NV12 Scaling module",
957             PP_NV12_SCALING,
958             pp_nv12_scaling_gen75,
959             sizeof(pp_nv12_scaling_gen75),
960             NULL,
961         },
962
963         gen7_pp_plx_avs_initialize,
964     },
965
966     {
967         {
968             "NV12 AVS module",
969             PP_NV12_AVS,
970             pp_nv12_avs_gen75,
971             sizeof(pp_nv12_avs_gen75),
972             NULL,
973         },
974
975         gen7_pp_plx_avs_initialize,
976     },
977
978     {
979         {
980             "NV12 DNDI module",
981             PP_NV12_DNDI,
982             pp_nv12_dndi_gen75,
983             sizeof(pp_nv12_dndi_gen75),
984             NULL,
985         },
986
987         gen7_pp_nv12_dndi_initialize,
988     },
989
990     {
991         {
992             "NV12 DN module",
993             PP_NV12_DN,
994             pp_nv12_dn_gen75,
995             sizeof(pp_nv12_dn_gen75),
996             NULL,
997         },
998
999         gen7_pp_nv12_dn_initialize,
1000     },
1001     {
1002         {
1003             "NV12_PA module",
1004             PP_NV12_LOAD_SAVE_PA,
1005             pp_nv12_load_save_pa_gen75,
1006             sizeof(pp_nv12_load_save_pa_gen75),
1007             NULL,
1008         },
1009     
1010         gen7_pp_plx_avs_initialize,
1011     },
1012
1013     {
1014         {
1015             "PL3_PA module",
1016             PP_PL3_LOAD_SAVE_PA,
1017             pp_pl3_load_save_pa_gen75,
1018             sizeof(pp_pl3_load_save_pa_gen75),
1019             NULL,
1020         },
1021     
1022         gen7_pp_plx_avs_initialize,
1023     },
1024
1025     {
1026         {
1027             "PA_NV12 module",
1028             PP_PA_LOAD_SAVE_NV12,
1029             pp_pa_load_save_nv12_gen75,
1030             sizeof(pp_pa_load_save_nv12_gen75),
1031             NULL,
1032         },
1033     
1034         gen7_pp_plx_avs_initialize,
1035     },
1036
1037     {
1038         {
1039             "PA_PL3 module",
1040             PP_PA_LOAD_SAVE_PL3,
1041             pp_pa_load_save_pl3_gen75,
1042             sizeof(pp_pa_load_save_pl3_gen75),
1043             NULL,
1044         },
1045     
1046         gen7_pp_plx_avs_initialize,
1047     },
1048     
1049     {
1050         {
1051             "RGBX_NV12 module",
1052             PP_RGBX_LOAD_SAVE_NV12,
1053             pp_rgbx_load_save_nv12_gen75,
1054             sizeof(pp_rgbx_load_save_nv12_gen75),
1055             NULL,
1056         },
1057     
1058         pp_plx_load_save_plx_initialize,
1059     },
1060
1061     {
1062         {
1063             "NV12_RGBX module",
1064             PP_NV12_LOAD_SAVE_RGBX,
1065             pp_nv12_load_save_rgbx_gen75,
1066             sizeof(pp_nv12_load_save_rgbx_gen75),
1067             NULL,
1068         },
1069     
1070         pp_plx_load_save_plx_initialize,
1071     },
1072             
1073 };
1074
1075 static int
1076 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1077 {
1078     struct i965_driver_data *i965 = i965_driver_data(ctx);
1079     int fourcc;
1080
1081     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1082         struct object_image *obj_image = IMAGE(surface->id);
1083         fourcc = obj_image->image.format.fourcc;
1084     } else {
1085         struct object_surface *obj_surface = SURFACE(surface->id);
1086         fourcc = obj_surface->fourcc;
1087     }
1088
1089     return fourcc;
1090 }
1091
1092 static void
1093 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1094 {
1095     switch (tiling) {
1096     case I915_TILING_NONE:
1097         ss->ss3.tiled_surface = 0;
1098         ss->ss3.tile_walk = 0;
1099         break;
1100     case I915_TILING_X:
1101         ss->ss3.tiled_surface = 1;
1102         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1103         break;
1104     case I915_TILING_Y:
1105         ss->ss3.tiled_surface = 1;
1106         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1107         break;
1108     }
1109 }
1110
1111 static void
1112 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1113 {
1114     switch (tiling) {
1115     case I915_TILING_NONE:
1116         ss->ss2.tiled_surface = 0;
1117         ss->ss2.tile_walk = 0;
1118         break;
1119     case I915_TILING_X:
1120         ss->ss2.tiled_surface = 1;
1121         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1122         break;
1123     case I915_TILING_Y:
1124         ss->ss2.tiled_surface = 1;
1125         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1126         break;
1127     }
1128 }
1129
1130 static void
1131 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1132 {
1133     switch (tiling) {
1134     case I915_TILING_NONE:
1135         ss->ss0.tiled_surface = 0;
1136         ss->ss0.tile_walk = 0;
1137         break;
1138     case I915_TILING_X:
1139         ss->ss0.tiled_surface = 1;
1140         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1141         break;
1142     case I915_TILING_Y:
1143         ss->ss0.tiled_surface = 1;
1144         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1145         break;
1146     }
1147 }
1148
1149 static void
1150 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1151 {
1152     switch (tiling) {
1153     case I915_TILING_NONE:
1154         ss->ss2.tiled_surface = 0;
1155         ss->ss2.tile_walk = 0;
1156         break;
1157     case I915_TILING_X:
1158         ss->ss2.tiled_surface = 1;
1159         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1160         break;
1161     case I915_TILING_Y:
1162         ss->ss2.tiled_surface = 1;
1163         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1164         break;
1165     }
1166 }
1167
1168 static void
1169 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1170 {
1171     struct i965_interface_descriptor *desc;
1172     dri_bo *bo;
1173     int pp_index = pp_context->current_pp;
1174
1175     bo = pp_context->idrt.bo;
1176     dri_bo_map(bo, 1);
1177     assert(bo->virtual);
1178     desc = bo->virtual;
1179     memset(desc, 0, sizeof(*desc));
1180     desc->desc0.grf_reg_blocks = 10;
1181     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1182     desc->desc1.const_urb_entry_read_offset = 0;
1183     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1184     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1185     desc->desc2.sampler_count = 0;
1186     desc->desc3.binding_table_entry_count = 0;
1187     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1188
1189     dri_bo_emit_reloc(bo,
1190                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1191                       desc->desc0.grf_reg_blocks,
1192                       offsetof(struct i965_interface_descriptor, desc0),
1193                       pp_context->pp_modules[pp_index].kernel.bo);
1194
1195     dri_bo_emit_reloc(bo,
1196                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1197                       desc->desc2.sampler_count << 2,
1198                       offsetof(struct i965_interface_descriptor, desc2),
1199                       pp_context->sampler_state_table.bo);
1200
1201     dri_bo_unmap(bo);
1202     pp_context->idrt.num_interface_descriptors++;
1203 }
1204
1205 static void
1206 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1207 {
1208     struct i965_vfe_state *vfe_state;
1209     dri_bo *bo;
1210
1211     bo = pp_context->vfe_state.bo;
1212     dri_bo_map(bo, 1);
1213     assert(bo->virtual);
1214     vfe_state = bo->virtual;
1215     memset(vfe_state, 0, sizeof(*vfe_state));
1216     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1217     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1218     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1219     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1220     vfe_state->vfe1.children_present = 0;
1221     vfe_state->vfe2.interface_descriptor_base = 
1222         pp_context->idrt.bo->offset >> 4; /* reloc */
1223     dri_bo_emit_reloc(bo,
1224                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1225                       0,
1226                       offsetof(struct i965_vfe_state, vfe2),
1227                       pp_context->idrt.bo);
1228     dri_bo_unmap(bo);
1229 }
1230
1231 static void
1232 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1233 {
1234     unsigned char *constant_buffer;
1235     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1236
1237     assert(sizeof(*pp_static_parameter) == 128);
1238     dri_bo_map(pp_context->curbe.bo, 1);
1239     assert(pp_context->curbe.bo->virtual);
1240     constant_buffer = pp_context->curbe.bo->virtual;
1241     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1242     dri_bo_unmap(pp_context->curbe.bo);
1243 }
1244
1245 static void
1246 ironlake_pp_states_setup(VADriverContextP ctx,
1247                          struct i965_post_processing_context *pp_context)
1248 {
1249     ironlake_pp_interface_descriptor_table(pp_context);
1250     ironlake_pp_vfe_state(pp_context);
1251     ironlake_pp_upload_constants(pp_context);
1252 }
1253
1254 static void
1255 ironlake_pp_pipeline_select(VADriverContextP ctx,
1256                             struct i965_post_processing_context *pp_context)
1257 {
1258     struct intel_batchbuffer *batch = pp_context->batch;
1259
1260     BEGIN_BATCH(batch, 1);
1261     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1262     ADVANCE_BATCH(batch);
1263 }
1264
1265 static void
1266 ironlake_pp_urb_layout(VADriverContextP ctx,
1267                        struct i965_post_processing_context *pp_context)
1268 {
1269     struct intel_batchbuffer *batch = pp_context->batch;
1270     unsigned int vfe_fence, cs_fence;
1271
1272     vfe_fence = pp_context->urb.cs_start;
1273     cs_fence = pp_context->urb.size;
1274
1275     BEGIN_BATCH(batch, 3);
1276     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1277     OUT_BATCH(batch, 0);
1278     OUT_BATCH(batch, 
1279               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1280               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1281     ADVANCE_BATCH(batch);
1282 }
1283
1284 static void
1285 ironlake_pp_state_base_address(VADriverContextP ctx,
1286                                struct i965_post_processing_context *pp_context)
1287 {
1288     struct intel_batchbuffer *batch = pp_context->batch;
1289
1290     BEGIN_BATCH(batch, 8);
1291     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1292     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1293     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1294     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1295     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1296     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1297     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1298     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1299     ADVANCE_BATCH(batch);
1300 }
1301
1302 static void
1303 ironlake_pp_state_pointers(VADriverContextP ctx,
1304                            struct i965_post_processing_context *pp_context)
1305 {
1306     struct intel_batchbuffer *batch = pp_context->batch;
1307
1308     BEGIN_BATCH(batch, 3);
1309     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1310     OUT_BATCH(batch, 0);
1311     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1312     ADVANCE_BATCH(batch);
1313 }
1314
1315 static void 
1316 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1317                           struct i965_post_processing_context *pp_context)
1318 {
1319     struct intel_batchbuffer *batch = pp_context->batch;
1320
1321     BEGIN_BATCH(batch, 2);
1322     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1323     OUT_BATCH(batch,
1324               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1325               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1326     ADVANCE_BATCH(batch);
1327 }
1328
1329 static void
1330 ironlake_pp_constant_buffer(VADriverContextP ctx,
1331                             struct i965_post_processing_context *pp_context)
1332 {
1333     struct intel_batchbuffer *batch = pp_context->batch;
1334
1335     BEGIN_BATCH(batch, 2);
1336     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1337     OUT_RELOC(batch, pp_context->curbe.bo,
1338               I915_GEM_DOMAIN_INSTRUCTION, 0,
1339               pp_context->urb.size_cs_entry - 1);
1340     ADVANCE_BATCH(batch);    
1341 }
1342
1343 static void
1344 ironlake_pp_object_walker(VADriverContextP ctx,
1345                           struct i965_post_processing_context *pp_context)
1346 {
1347     struct intel_batchbuffer *batch = pp_context->batch;
1348     int x, x_steps, y, y_steps;
1349     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1350
1351     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1352     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1353
1354     for (y = 0; y < y_steps; y++) {
1355         for (x = 0; x < x_steps; x++) {
1356             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1357                 BEGIN_BATCH(batch, 20);
1358                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1359                 OUT_BATCH(batch, 0);
1360                 OUT_BATCH(batch, 0); /* no indirect data */
1361                 OUT_BATCH(batch, 0);
1362
1363                 /* inline data grf 5-6 */
1364                 assert(sizeof(*pp_inline_parameter) == 64);
1365                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1366
1367                 ADVANCE_BATCH(batch);
1368             }
1369         }
1370     }
1371 }
1372
1373 static void
1374 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1375                            struct i965_post_processing_context *pp_context)
1376 {
1377     struct intel_batchbuffer *batch = pp_context->batch;
1378
1379     intel_batchbuffer_start_atomic(batch, 0x1000);
1380     intel_batchbuffer_emit_mi_flush(batch);
1381     ironlake_pp_pipeline_select(ctx, pp_context);
1382     ironlake_pp_state_base_address(ctx, pp_context);
1383     ironlake_pp_state_pointers(ctx, pp_context);
1384     ironlake_pp_urb_layout(ctx, pp_context);
1385     ironlake_pp_cs_urb_layout(ctx, pp_context);
1386     ironlake_pp_constant_buffer(ctx, pp_context);
1387     ironlake_pp_object_walker(ctx, pp_context);
1388     intel_batchbuffer_end_atomic(batch);
1389 }
1390
1391 // update u/v offset when the surface format are packed yuv
1392 static void i965_update_src_surface_static_parameter(
1393     VADriverContextP    ctx, 
1394     struct i965_post_processing_context *pp_context,
1395     const struct i965_surface *surface)
1396 {
1397     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1398     int fourcc = pp_get_surface_fourcc(ctx, surface);
1399
1400     switch (fourcc) {
1401     case VA_FOURCC('Y', 'U', 'Y', '2'):
1402         pp_static_parameter->grf1.source_packed_u_offset = 1;
1403         pp_static_parameter->grf1.source_packed_v_offset = 3;
1404         break;
1405     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1406         pp_static_parameter->grf1.source_packed_y_offset = 1;
1407         pp_static_parameter->grf1.source_packed_v_offset = 2;
1408         break;
1409     case VA_FOURCC('B', 'G', 'R', 'X'):
1410     case VA_FOURCC('B', 'G', 'R', 'A'):
1411         pp_static_parameter->grf1.source_rgb_layout = 0;
1412         break;
1413     case VA_FOURCC('R', 'G', 'B', 'X'):
1414     case VA_FOURCC('R', 'G', 'B', 'A'):
1415         pp_static_parameter->grf1.source_rgb_layout = 1;
1416         break;
1417     default:
1418         break;
1419     }
1420     
1421 }
1422
1423 static void i965_update_dst_surface_static_parameter(
1424     VADriverContextP    ctx, 
1425     struct i965_post_processing_context *pp_context,
1426     const struct i965_surface *surface)
1427 {
1428     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1429     int fourcc = pp_get_surface_fourcc(ctx, surface);
1430
1431     switch (fourcc) {
1432     case VA_FOURCC('Y', 'U', 'Y', '2'):
1433         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1434         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1435         break;
1436     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1437         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1438         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1439         break;
1440     case VA_FOURCC('B', 'G', 'R', 'X'):
1441     case VA_FOURCC('B', 'G', 'R', 'A'):
1442         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1443         break;
1444     case VA_FOURCC('R', 'G', 'B', 'X'):
1445     case VA_FOURCC('R', 'G', 'B', 'A'):
1446         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1447         break;
1448     default:
1449         break;
1450     }
1451     
1452 }
1453
1454 static void
1455 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1456                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1457                           int width, int height, int pitch, int format, 
1458                           int index, int is_target)
1459 {
1460     struct i965_surface_state *ss;
1461     dri_bo *ss_bo;
1462     unsigned int tiling;
1463     unsigned int swizzle;
1464
1465     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1466     ss_bo = pp_context->surface_state_binding_table.bo;
1467     assert(ss_bo);
1468
1469     dri_bo_map(ss_bo, True);
1470     assert(ss_bo->virtual);
1471     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1472     memset(ss, 0, sizeof(*ss));
1473     ss->ss0.surface_type = I965_SURFACE_2D;
1474     ss->ss0.surface_format = format;
1475     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1476     ss->ss2.width = width - 1;
1477     ss->ss2.height = height - 1;
1478     ss->ss3.pitch = pitch - 1;
1479     pp_set_surface_tiling(ss, tiling);
1480     dri_bo_emit_reloc(ss_bo,
1481                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1482                       surf_bo_offset,
1483                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1484                       surf_bo);
1485     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1486     dri_bo_unmap(ss_bo);
1487 }
1488
1489 static void
1490 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1491                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1492                            int width, int height, int wpitch,
1493                            int xoffset, int yoffset,
1494                            int format, int interleave_chroma,
1495                            int index)
1496 {
1497     struct i965_surface_state2 *ss2;
1498     dri_bo *ss2_bo;
1499     unsigned int tiling;
1500     unsigned int swizzle;
1501
1502     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1503     ss2_bo = pp_context->surface_state_binding_table.bo;
1504     assert(ss2_bo);
1505
1506     dri_bo_map(ss2_bo, True);
1507     assert(ss2_bo->virtual);
1508     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1509     memset(ss2, 0, sizeof(*ss2));
1510     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1511     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1512     ss2->ss1.width = width - 1;
1513     ss2->ss1.height = height - 1;
1514     ss2->ss2.pitch = wpitch - 1;
1515     ss2->ss2.interleave_chroma = interleave_chroma;
1516     ss2->ss2.surface_format = format;
1517     ss2->ss3.x_offset_for_cb = xoffset;
1518     ss2->ss3.y_offset_for_cb = yoffset;
1519     pp_set_surface2_tiling(ss2, tiling);
1520     dri_bo_emit_reloc(ss2_bo,
1521                       I915_GEM_DOMAIN_RENDER, 0,
1522                       surf_bo_offset,
1523                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1524                       surf_bo);
1525     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1526     dri_bo_unmap(ss2_bo);
1527 }
1528
1529 static void
1530 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1531                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1532                           int width, int height, int pitch, int format, 
1533                           int index, int is_target)
1534 {
1535     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1536     struct gen7_surface_state *ss;
1537     dri_bo *ss_bo;
1538     unsigned int tiling;
1539     unsigned int swizzle;
1540
1541     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1542     ss_bo = pp_context->surface_state_binding_table.bo;
1543     assert(ss_bo);
1544
1545     dri_bo_map(ss_bo, True);
1546     assert(ss_bo->virtual);
1547     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1548     memset(ss, 0, sizeof(*ss));
1549     ss->ss0.surface_type = I965_SURFACE_2D;
1550     ss->ss0.surface_format = format;
1551     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1552     ss->ss2.width = width - 1;
1553     ss->ss2.height = height - 1;
1554     ss->ss3.pitch = pitch - 1;
1555     gen7_pp_set_surface_tiling(ss, tiling);
1556     if (IS_HASWELL(i965->intel.device_id))
1557         gen7_render_set_surface_scs(ss);
1558     dri_bo_emit_reloc(ss_bo,
1559                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1560                       surf_bo_offset,
1561                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1562                       surf_bo);
1563     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1564     dri_bo_unmap(ss_bo);
1565 }
1566
1567 static void
1568 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1569                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1570                            int width, int height, int wpitch,
1571                            int xoffset, int yoffset,
1572                            int format, int interleave_chroma,
1573                            int index)
1574 {
1575     struct gen7_surface_state2 *ss2;
1576     dri_bo *ss2_bo;
1577     unsigned int tiling;
1578     unsigned int swizzle;
1579
1580     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1581     ss2_bo = pp_context->surface_state_binding_table.bo;
1582     assert(ss2_bo);
1583
1584     dri_bo_map(ss2_bo, True);
1585     assert(ss2_bo->virtual);
1586     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1587     memset(ss2, 0, sizeof(*ss2));
1588     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1589     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1590     ss2->ss1.width = width - 1;
1591     ss2->ss1.height = height - 1;
1592     ss2->ss2.pitch = wpitch - 1;
1593     ss2->ss2.interleave_chroma = interleave_chroma;
1594     ss2->ss2.surface_format = format;
1595     ss2->ss3.x_offset_for_cb = xoffset;
1596     ss2->ss3.y_offset_for_cb = yoffset;
1597     gen7_pp_set_surface2_tiling(ss2, tiling);
1598     dri_bo_emit_reloc(ss2_bo,
1599                       I915_GEM_DOMAIN_RENDER, 0,
1600                       surf_bo_offset,
1601                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1602                       surf_bo);
1603     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1604     dri_bo_unmap(ss2_bo);
1605 }
1606
1607 static void 
1608 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1609                                 const struct i965_surface *surface, 
1610                                 int base_index, int is_target,
1611                                 int *width, int *height, int *pitch, int *offset)
1612 {
1613     struct i965_driver_data *i965 = i965_driver_data(ctx);
1614     struct object_surface *obj_surface;
1615     struct object_image *obj_image;
1616     dri_bo *bo;
1617     int fourcc = pp_get_surface_fourcc(ctx, surface);
1618     const int Y = 0;
1619     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1620     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1621     const int UV = 1;
1622     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1623     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1624     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1625                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1626                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1627                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1628     int scale_factor_of_1st_plane_width_in_byte = 1;
1629                               
1630     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1631         obj_surface = SURFACE(surface->id);
1632         bo = obj_surface->bo;
1633         width[0] = obj_surface->orig_width;
1634         height[0] = obj_surface->orig_height;
1635         pitch[0] = obj_surface->width;
1636         offset[0] = 0;
1637
1638         if (full_packed_format) {
1639             scale_factor_of_1st_plane_width_in_byte = 4; 
1640             pitch[0] = obj_surface->width * 4;
1641         }
1642         else if (packed_yuv ) {
1643             scale_factor_of_1st_plane_width_in_byte =  2; 
1644             pitch[0] = obj_surface->width * 2;
1645         }
1646         else if (interleaved_uv) {
1647             width[1] = obj_surface->orig_width;
1648             height[1] = obj_surface->orig_height / 2;
1649             pitch[1] = obj_surface->width;
1650             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1651         } else {
1652             width[1] = obj_surface->orig_width / 2;
1653             height[1] = obj_surface->orig_height / 2;
1654             pitch[1] = obj_surface->width / 2;
1655             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1656             width[2] = obj_surface->orig_width / 2;
1657             height[2] = obj_surface->orig_height / 2;
1658             pitch[2] = obj_surface->width / 2;
1659             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1660         }
1661     } else {
1662         obj_image = IMAGE(surface->id);
1663         bo = obj_image->bo;
1664         width[0] = obj_image->image.width;
1665         height[0] = obj_image->image.height;
1666         pitch[0] = obj_image->image.pitches[0];
1667         offset[0] = obj_image->image.offsets[0];
1668
1669         if (full_packed_format) {
1670             scale_factor_of_1st_plane_width_in_byte = 4;
1671         }
1672         else if (packed_yuv ) {
1673             scale_factor_of_1st_plane_width_in_byte = 2;
1674         }
1675         else if (interleaved_uv) {
1676             width[1] = obj_image->image.width;
1677             height[1] = obj_image->image.height / 2;
1678             pitch[1] = obj_image->image.pitches[1];
1679             offset[1] = obj_image->image.offsets[1];
1680         } else {
1681             width[1] = obj_image->image.width / 2;
1682             height[1] = obj_image->image.height / 2;
1683             pitch[1] = obj_image->image.pitches[1];
1684             offset[1] = obj_image->image.offsets[1];
1685             width[2] = obj_image->image.width / 2;
1686             height[2] = obj_image->image.height / 2;
1687             pitch[2] = obj_image->image.pitches[2];
1688             offset[2] = obj_image->image.offsets[2];
1689         }
1690     }
1691
1692     /* Y surface */
1693     i965_pp_set_surface_state(ctx, pp_context,
1694                               bo, offset[Y],
1695                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1696                               base_index, is_target);
1697
1698     if (!packed_yuv && !full_packed_format) {
1699         if (interleaved_uv) {
1700             i965_pp_set_surface_state(ctx, pp_context,
1701                                       bo, offset[UV],
1702                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1703                                       base_index + 1, is_target);
1704         } else {
1705             /* U surface */
1706             i965_pp_set_surface_state(ctx, pp_context,
1707                                       bo, offset[U],
1708                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1709                                       base_index + 1, is_target);
1710
1711             /* V surface */
1712             i965_pp_set_surface_state(ctx, pp_context,
1713                                       bo, offset[V],
1714                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1715                                       base_index + 2, is_target);
1716         }
1717     }
1718
1719 }
1720
1721 static void 
1722 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1723                                      const struct i965_surface *surface, 
1724                                      int base_index, int is_target,
1725                                      int *width, int *height, int *pitch, int *offset)
1726 {
1727     struct i965_driver_data *i965 = i965_driver_data(ctx);
1728     struct object_surface *obj_surface;
1729     struct object_image *obj_image;
1730     dri_bo *bo;
1731     int fourcc = pp_get_surface_fourcc(ctx, surface);
1732     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1733                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1734     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1735                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1736     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1737     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1738     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1739                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1740                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1741                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1742
1743     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1744         obj_surface = SURFACE(surface->id);
1745         bo = obj_surface->bo;
1746         width[0] = obj_surface->orig_width;
1747         height[0] = obj_surface->orig_height;
1748         pitch[0] = obj_surface->width;
1749         offset[0] = 0;
1750
1751         if (packed_yuv) {
1752             if (is_target)
1753                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1754             else
1755                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1756
1757             pitch[0] = obj_surface->width * 2;
1758         } else if (rgbx_format) {
1759             if (is_target)
1760                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
1761         }
1762
1763         width[1] = obj_surface->cb_cr_width;
1764         height[1] = obj_surface->cb_cr_height;
1765         pitch[1] = obj_surface->cb_cr_pitch;
1766         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1767
1768         width[2] = obj_surface->cb_cr_width;
1769         height[2] = obj_surface->cb_cr_height;
1770         pitch[2] = obj_surface->cb_cr_pitch;
1771         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1772     } else {
1773         obj_image = IMAGE(surface->id);
1774         bo = obj_image->bo;
1775         width[0] = obj_image->image.width;
1776         height[0] = obj_image->image.height;
1777         pitch[0] = obj_image->image.pitches[0];
1778         offset[0] = obj_image->image.offsets[0];
1779
1780         if (rgbx_format) {
1781             if (is_target)
1782                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
1783         } else if (packed_yuv) {
1784             if (is_target)
1785                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1786             else
1787                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1788         } else if (interleaved_uv) {
1789             width[1] = obj_image->image.width / 2;
1790             height[1] = obj_image->image.height / 2;
1791             pitch[1] = obj_image->image.pitches[1];
1792             offset[1] = obj_image->image.offsets[1];
1793         } else {
1794             width[1] = obj_image->image.width / 2;
1795             height[1] = obj_image->image.height / 2;
1796             pitch[1] = obj_image->image.pitches[U];
1797             offset[1] = obj_image->image.offsets[U];
1798             width[2] = obj_image->image.width / 2;
1799             height[2] = obj_image->image.height / 2;
1800             pitch[2] = obj_image->image.pitches[V];
1801             offset[2] = obj_image->image.offsets[V];
1802         }
1803     }
1804
1805     if (is_target) {
1806         gen7_pp_set_surface_state(ctx, pp_context,
1807                                   bo, 0,
1808                                   width[0] / 4, height[0], pitch[0],
1809                                   I965_SURFACEFORMAT_R8_UINT,
1810                                   base_index, 1);
1811         if (rgbx_format) {
1812                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1813                 /* the format is MSB: X-B-G-R */
1814                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1815                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
1816                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
1817                         /* It is stored as MSB: X-R-G-B */
1818                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1819                 }
1820         }
1821         if (!packed_yuv && !rgbx_format) {
1822             if (interleaved_uv) {
1823                 gen7_pp_set_surface_state(ctx, pp_context,
1824                                           bo, offset[1],
1825                                           width[1] / 2, height[1], pitch[1],
1826                                           I965_SURFACEFORMAT_R8G8_SINT,
1827                                           base_index + 1, 1);
1828             } else {
1829                 gen7_pp_set_surface_state(ctx, pp_context,
1830                                           bo, offset[1],
1831                                           width[1] / 4, height[1], pitch[1],
1832                                           I965_SURFACEFORMAT_R8_SINT,
1833                                           base_index + 1, 1);
1834                 gen7_pp_set_surface_state(ctx, pp_context,
1835                                           bo, offset[2],
1836                                           width[2] / 4, height[2], pitch[2],
1837                                           I965_SURFACEFORMAT_R8_SINT,
1838                                           base_index + 2, 1);
1839             }
1840         }
1841     } else {
1842         int format0 = SURFACE_FORMAT_Y8_UNORM;
1843
1844         switch (fourcc) {
1845         case VA_FOURCC('Y', 'U', 'Y', '2'):
1846             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1847             break;
1848
1849         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1850             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1851             break;
1852
1853         default:
1854             break;
1855         }
1856
1857         gen7_pp_set_surface2_state(ctx, pp_context,
1858                                    bo, offset[0],
1859                                    width[0], height[0], pitch[0],
1860                                    0, 0,
1861                                    format0, 0,
1862                                    base_index);
1863
1864         if (!packed_yuv) {
1865             if (interleaved_uv) {
1866                 gen7_pp_set_surface2_state(ctx, pp_context,
1867                                            bo, offset[1],
1868                                            width[1], height[1], pitch[1],
1869                                            0, 0,
1870                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1871                                            base_index + 1);
1872             } else {
1873                 gen7_pp_set_surface2_state(ctx, pp_context,
1874                                            bo, offset[1],
1875                                            width[1], height[1], pitch[1],
1876                                            0, 0,
1877                                            SURFACE_FORMAT_R8_UNORM, 0,
1878                                            base_index + 1);
1879                 gen7_pp_set_surface2_state(ctx, pp_context,
1880                                            bo, offset[2],
1881                                            width[2], height[2], pitch[2],
1882                                            0, 0,
1883                                            SURFACE_FORMAT_R8_UNORM, 0,
1884                                            base_index + 2);
1885             }
1886         }
1887     }
1888 }
1889
1890 static int
1891 pp_null_x_steps(void *private_context)
1892 {
1893     return 1;
1894 }
1895
1896 static int
1897 pp_null_y_steps(void *private_context)
1898 {
1899     return 1;
1900 }
1901
1902 static int
1903 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1904 {
1905     return 0;
1906 }
1907
1908 static VAStatus
1909 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1910                    const struct i965_surface *src_surface,
1911                    const VARectangle *src_rect,
1912                    struct i965_surface *dst_surface,
1913                    const VARectangle *dst_rect,
1914                    void *filter_param)
1915 {
1916     /* private function & data */
1917     pp_context->pp_x_steps = pp_null_x_steps;
1918     pp_context->pp_y_steps = pp_null_y_steps;
1919     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1920
1921     dst_surface->flags = src_surface->flags;
1922
1923     return VA_STATUS_SUCCESS;
1924 }
1925
1926 static int
1927 pp_load_save_x_steps(void *private_context)
1928 {
1929     return 1;
1930 }
1931
1932 static int
1933 pp_load_save_y_steps(void *private_context)
1934 {
1935     struct pp_load_save_context *pp_load_save_context = private_context;
1936
1937     return pp_load_save_context->dest_h / 8;
1938 }
1939
1940 static int
1941 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1942 {
1943     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1944     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1945
1946     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1947     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1948
1949     return 0;
1950 }
1951
1952 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1953 {
1954     int i;
1955     /* x offset of dest surface must be dword aligned.
1956      * so we have to extend dst surface on left edge, and mask out pixels not interested
1957      */
1958     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1959         pp_context->block_horizontal_mask_left = 0;
1960         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1961         {
1962             pp_context->block_horizontal_mask_left |= 1<<i;
1963         }
1964     }
1965     else {
1966         pp_context->block_horizontal_mask_left = 0xffff;
1967     }
1968     
1969     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1970     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1971         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1972     }
1973     else {
1974         pp_context->block_horizontal_mask_right = 0xffff;
1975     }
1976     
1977     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1978         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1979     }
1980     else {
1981         pp_context->block_vertical_mask_bottom = 0xff;
1982     }
1983
1984 }
1985 static VAStatus
1986 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1987                                 const struct i965_surface *src_surface,
1988                                 const VARectangle *src_rect,
1989                                 struct i965_surface *dst_surface,
1990                                 const VARectangle *dst_rect,
1991                                 void *filter_param)
1992 {
1993     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1994     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1995     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1996     int width[3], height[3], pitch[3], offset[3];
1997     const int Y = 0;
1998
1999     /* source surface */
2000     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2001                                     width, height, pitch, offset);
2002
2003     /* destination surface */
2004     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2005                                     width, height, pitch, offset);
2006
2007     /* private function & data */
2008     pp_context->pp_x_steps = pp_load_save_x_steps;
2009     pp_context->pp_y_steps = pp_load_save_y_steps;
2010     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2011
2012     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2013     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2014     pp_load_save_context->dest_y = dst_rect->y;
2015     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2016     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2017
2018     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2019     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2020
2021     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2022     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2023
2024     // update u/v offset for packed yuv
2025     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2026     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2027
2028     dst_surface->flags = src_surface->flags;
2029
2030     return VA_STATUS_SUCCESS;
2031 }
2032
2033 static int
2034 pp_scaling_x_steps(void *private_context)
2035 {
2036     return 1;
2037 }
2038
2039 static int
2040 pp_scaling_y_steps(void *private_context)
2041 {
2042     struct pp_scaling_context *pp_scaling_context = private_context;
2043
2044     return pp_scaling_context->dest_h / 8;
2045 }
2046
2047 static int
2048 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2049 {
2050     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2051     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2052     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2053     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2054     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2055
2056     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2057     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2058     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2059     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2060     
2061     return 0;
2062 }
2063
2064 static VAStatus
2065 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2066                            const struct i965_surface *src_surface,
2067                            const VARectangle *src_rect,
2068                            struct i965_surface *dst_surface,
2069                            const VARectangle *dst_rect,
2070                            void *filter_param)
2071 {
2072     struct i965_driver_data *i965 = i965_driver_data(ctx);
2073     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2074     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2075     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2076     struct object_surface *obj_surface;
2077     struct i965_sampler_state *sampler_state;
2078     int in_w, in_h, in_wpitch, in_hpitch;
2079     int out_w, out_h, out_wpitch, out_hpitch;
2080
2081     /* source surface */
2082     obj_surface = SURFACE(src_surface->id);
2083     in_w = obj_surface->orig_width;
2084     in_h = obj_surface->orig_height;
2085     in_wpitch = obj_surface->width;
2086     in_hpitch = obj_surface->height;
2087
2088     /* source Y surface index 1 */
2089     i965_pp_set_surface_state(ctx, pp_context,
2090                               obj_surface->bo, 0,
2091                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2092                               1, 0);
2093
2094     /* source UV surface index 2 */
2095     i965_pp_set_surface_state(ctx, pp_context,
2096                               obj_surface->bo, in_wpitch * in_hpitch,
2097                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2098                               2, 0);
2099
2100     /* destination surface */
2101     obj_surface = SURFACE(dst_surface->id);
2102     out_w = obj_surface->orig_width;
2103     out_h = obj_surface->orig_height;
2104     out_wpitch = obj_surface->width;
2105     out_hpitch = obj_surface->height;
2106
2107     /* destination Y surface index 7 */
2108     i965_pp_set_surface_state(ctx, pp_context,
2109                               obj_surface->bo, 0,
2110                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2111                               7, 1);
2112
2113     /* destination UV surface index 8 */
2114     i965_pp_set_surface_state(ctx, pp_context,
2115                               obj_surface->bo, out_wpitch * out_hpitch,
2116                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2117                               8, 1);
2118
2119     /* sampler state */
2120     dri_bo_map(pp_context->sampler_state_table.bo, True);
2121     assert(pp_context->sampler_state_table.bo->virtual);
2122     sampler_state = pp_context->sampler_state_table.bo->virtual;
2123
2124     /* SIMD16 Y index 1 */
2125     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2126     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2127     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2128     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2129     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2130
2131     /* SIMD16 UV index 2 */
2132     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2133     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2134     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2135     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2136     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2137
2138     dri_bo_unmap(pp_context->sampler_state_table.bo);
2139
2140     /* private function & data */
2141     pp_context->pp_x_steps = pp_scaling_x_steps;
2142     pp_context->pp_y_steps = pp_scaling_y_steps;
2143     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2144
2145     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2146     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2147     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2148     pp_scaling_context->dest_y = dst_rect->y;
2149     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2150     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2151     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2152     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2153
2154     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2155
2156     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2157     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2158     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2159
2160     dst_surface->flags = src_surface->flags;
2161
2162     return VA_STATUS_SUCCESS;
2163 }
2164
2165 static int
2166 pp_avs_x_steps(void *private_context)
2167 {
2168     struct pp_avs_context *pp_avs_context = private_context;
2169
2170     return pp_avs_context->dest_w / 16;
2171 }
2172
2173 static int
2174 pp_avs_y_steps(void *private_context)
2175 {
2176     return 1;
2177 }
2178
2179 static int
2180 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2181 {
2182     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2183     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2184     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2185     float src_x_steping, src_y_steping, video_step_delta;
2186     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2187
2188     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2189         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2190         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2191     } else if (tmp_w >= pp_avs_context->dest_w) {
2192         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2193         pp_inline_parameter->grf6.video_step_delta = 0;
2194         
2195         if (x == 0) {
2196             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2197                 pp_avs_context->src_normalized_x;
2198         } else {
2199             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2200             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2201             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2202                 16 * 15 * video_step_delta / 2;
2203         }
2204     } else {
2205         int n0, n1, n2, nls_left, nls_right;
2206         int factor_a = 5, factor_b = 4;
2207         float f;
2208
2209         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2210         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2211         n2 = tmp_w / (16 * factor_a);
2212         nls_left = n0 + n2;
2213         nls_right = n1 + n2;
2214         f = (float) n2 * 16 / tmp_w;
2215         
2216         if (n0 < 5) {
2217             pp_inline_parameter->grf6.video_step_delta = 0.0;
2218
2219             if (x == 0) {
2220                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2221                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2222             } else {
2223                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2224                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2225                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2226                     16 * 15 * video_step_delta / 2;
2227             }
2228         } else {
2229             if (x < nls_left) {
2230                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2231                 float a = f / (nls_left * 16 * factor_b);
2232                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2233                 
2234                 pp_inline_parameter->grf6.video_step_delta = b;
2235
2236                 if (x == 0) {
2237                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2238                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2239                 } else {
2240                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2241                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2242                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2243                         16 * 15 * video_step_delta / 2;
2244                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2245                 }
2246             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2247                 /* scale the center linearly */
2248                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2249                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2250                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2251                     16 * 15 * video_step_delta / 2;
2252                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2253                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2254             } else {
2255                 float a = f / (nls_right * 16 * factor_b);
2256                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2257
2258                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2259                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2260                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2261                     16 * 15 * video_step_delta / 2;
2262                 pp_inline_parameter->grf6.video_step_delta = -b;
2263
2264                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2265                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2266                 else
2267                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2268             }
2269         }
2270     }
2271
2272     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2273     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2274     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2275     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2276
2277     return 0;
2278 }
2279
2280 static VAStatus
2281 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2282                        const struct i965_surface *src_surface,
2283                        const VARectangle *src_rect,
2284                        struct i965_surface *dst_surface,
2285                        const VARectangle *dst_rect,
2286                        void *filter_param,
2287                        int nlas)
2288 {
2289     struct i965_driver_data *i965 = i965_driver_data(ctx);
2290     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2291     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2292     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2293     struct object_surface *obj_surface;
2294     struct i965_sampler_8x8 *sampler_8x8;
2295     struct i965_sampler_8x8_state *sampler_8x8_state;
2296     int index;
2297     int in_w, in_h, in_wpitch, in_hpitch;
2298     int out_w, out_h, out_wpitch, out_hpitch;
2299     int i;
2300
2301     /* surface */
2302     obj_surface = SURFACE(src_surface->id);
2303     in_w = obj_surface->orig_width;
2304     in_h = obj_surface->orig_height;
2305     in_wpitch = obj_surface->width;
2306     in_hpitch = obj_surface->height;
2307
2308     /* source Y surface index 1 */
2309     i965_pp_set_surface2_state(ctx, pp_context,
2310                                obj_surface->bo, 0,
2311                                in_w, in_h, in_wpitch,
2312                                0, 0,
2313                                SURFACE_FORMAT_Y8_UNORM, 0,
2314                                1);
2315
2316     /* source UV surface index 2 */
2317     i965_pp_set_surface2_state(ctx, pp_context,
2318                                obj_surface->bo, in_wpitch * in_hpitch,
2319                                in_w / 2, in_h / 2, in_wpitch,
2320                                0, 0,
2321                                SURFACE_FORMAT_R8B8_UNORM, 0,
2322                                2);
2323
2324     /* destination surface */
2325     obj_surface = SURFACE(dst_surface->id);
2326     out_w = obj_surface->orig_width;
2327     out_h = obj_surface->orig_height;
2328     out_wpitch = obj_surface->width;
2329     out_hpitch = obj_surface->height;
2330     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2331
2332     /* destination Y surface index 7 */
2333     i965_pp_set_surface_state(ctx, pp_context,
2334                               obj_surface->bo, 0,
2335                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2336                               7, 1);
2337
2338     /* destination UV surface index 8 */
2339     i965_pp_set_surface_state(ctx, pp_context,
2340                               obj_surface->bo, out_wpitch * out_hpitch,
2341                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2342                               8, 1);
2343
2344     /* sampler 8x8 state */
2345     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2346     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2347     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2348     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2349     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2350
2351     for (i = 0; i < 17; i++) {
2352         /* for Y channel, currently ignore */
2353         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2354         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2355         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2356         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2357         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2358         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2359         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2360         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2361         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2362         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2363         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2364         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2365         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2366         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2367         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2368         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2369         /* for U/V channel, 0.25 */
2370         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2371         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2372         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2373         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2374         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2375         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2376         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2377         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2378         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2379         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2380         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2381         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2382         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2383         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2384         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2385         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2386     }
2387
2388     sampler_8x8_state->dw136.default_sharpness_level = 0;
2389     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2390     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2391     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2392     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2393
2394     /* sampler 8x8 */
2395     dri_bo_map(pp_context->sampler_state_table.bo, True);
2396     assert(pp_context->sampler_state_table.bo->virtual);
2397     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2398     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2399
2400     /* sample_8x8 Y index 1 */
2401     index = 1;
2402     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2403     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2404     sampler_8x8[index].dw0.ief_bypass = 1;
2405     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2406     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2407     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2408     sampler_8x8[index].dw2.global_noise_estimation = 22;
2409     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2410     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2411     sampler_8x8[index].dw3.strong_edge_weight = 7;
2412     sampler_8x8[index].dw3.regular_weight = 2;
2413     sampler_8x8[index].dw3.non_edge_weight = 0;
2414     sampler_8x8[index].dw3.gain_factor = 40;
2415     sampler_8x8[index].dw4.steepness_boost = 0;
2416     sampler_8x8[index].dw4.steepness_threshold = 0;
2417     sampler_8x8[index].dw4.mr_boost = 0;
2418     sampler_8x8[index].dw4.mr_threshold = 5;
2419     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2420     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2421     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2422     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2423     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2424     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2425     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2426     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2427     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2428     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2429     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2430     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2431     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2432     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2433     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2434     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2435     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2436     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2437     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2438     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2439     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2440     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2441     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2442     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2443     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2444     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2445     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2446     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2447     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2448     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2449     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2450     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2451     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2452     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2453     sampler_8x8[index].dw13.limiter_boost = 0;
2454     sampler_8x8[index].dw13.minimum_limiter = 10;
2455     sampler_8x8[index].dw13.maximum_limiter = 11;
2456     sampler_8x8[index].dw14.clip_limiter = 130;
2457     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2458                       I915_GEM_DOMAIN_RENDER, 
2459                       0,
2460                       0,
2461                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2462                       pp_context->sampler_state_table.bo_8x8);
2463
2464     /* sample_8x8 UV index 2 */
2465     index = 2;
2466     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2467     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2468     sampler_8x8[index].dw0.ief_bypass = 1;
2469     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2470     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2471     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2472     sampler_8x8[index].dw2.global_noise_estimation = 22;
2473     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2474     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2475     sampler_8x8[index].dw3.strong_edge_weight = 7;
2476     sampler_8x8[index].dw3.regular_weight = 2;
2477     sampler_8x8[index].dw3.non_edge_weight = 0;
2478     sampler_8x8[index].dw3.gain_factor = 40;
2479     sampler_8x8[index].dw4.steepness_boost = 0;
2480     sampler_8x8[index].dw4.steepness_threshold = 0;
2481     sampler_8x8[index].dw4.mr_boost = 0;
2482     sampler_8x8[index].dw4.mr_threshold = 5;
2483     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2484     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2485     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2486     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2487     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2488     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2489     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2490     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2491     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2492     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2493     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2494     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2495     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2496     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2497     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2498     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2499     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2500     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2501     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2502     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2503     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2504     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2505     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2506     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2507     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2508     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2509     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2510     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2511     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2512     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2513     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2514     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2515     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2516     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2517     sampler_8x8[index].dw13.limiter_boost = 0;
2518     sampler_8x8[index].dw13.minimum_limiter = 10;
2519     sampler_8x8[index].dw13.maximum_limiter = 11;
2520     sampler_8x8[index].dw14.clip_limiter = 130;
2521     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2522                       I915_GEM_DOMAIN_RENDER, 
2523                       0,
2524                       0,
2525                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2526                       pp_context->sampler_state_table.bo_8x8);
2527
2528     dri_bo_unmap(pp_context->sampler_state_table.bo);
2529
2530     /* private function & data */
2531     pp_context->pp_x_steps = pp_avs_x_steps;
2532     pp_context->pp_y_steps = pp_avs_y_steps;
2533     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2534
2535     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2536     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2537     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2538     pp_avs_context->dest_y = dst_rect->y;
2539     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2540     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2541     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2542     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2543     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2544     pp_avs_context->src_h = src_rect->height;
2545
2546     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2547     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2548
2549     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2550     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2551     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2552     pp_inline_parameter->grf6.video_step_delta = 0.0;
2553
2554     dst_surface->flags = src_surface->flags;
2555
2556     return VA_STATUS_SUCCESS;
2557 }
2558
2559 static VAStatus
2560 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2561                             const struct i965_surface *src_surface,
2562                             const VARectangle *src_rect,
2563                             struct i965_surface *dst_surface,
2564                             const VARectangle *dst_rect,
2565                             void *filter_param)
2566 {
2567     return pp_nv12_avs_initialize(ctx, pp_context,
2568                                   src_surface,
2569                                   src_rect,
2570                                   dst_surface,
2571                                   dst_rect,
2572                                   filter_param,
2573                                   1);
2574 }
2575
2576 static VAStatus
2577 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2578                              const struct i965_surface *src_surface,
2579                              const VARectangle *src_rect,
2580                              struct i965_surface *dst_surface,
2581                              const VARectangle *dst_rect,
2582                              void *filter_param)
2583 {
2584     return pp_nv12_avs_initialize(ctx, pp_context,
2585                                   src_surface,
2586                                   src_rect,
2587                                   dst_surface,
2588                                   dst_rect,
2589                                   filter_param,
2590                                   0);    
2591 }
2592
2593 static int
2594 gen7_pp_avs_x_steps(void *private_context)
2595 {
2596     struct pp_avs_context *pp_avs_context = private_context;
2597
2598     return pp_avs_context->dest_w / 16;
2599 }
2600
2601 static int
2602 gen7_pp_avs_y_steps(void *private_context)
2603 {
2604     struct pp_avs_context *pp_avs_context = private_context;
2605
2606     return pp_avs_context->dest_h / 16;
2607 }
2608
2609 static int
2610 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2611 {
2612     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2613     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2614
2615     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2616     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2617     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2618     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2619
2620     return 0;
2621 }
2622
2623 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2624                                               struct i965_post_processing_context *pp_context,
2625                                               const struct i965_surface *surface)
2626 {
2627     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2628     int fourcc = pp_get_surface_fourcc(ctx, surface);
2629     
2630     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2631         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2632         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2633         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2634     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2635         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2636         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2637         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2638     }
2639 }
2640
2641 static VAStatus
2642 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2643                            const struct i965_surface *src_surface,
2644                            const VARectangle *src_rect,
2645                            struct i965_surface *dst_surface,
2646                            const VARectangle *dst_rect,
2647                            void *filter_param)
2648 {
2649     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2650     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2651     struct gen7_sampler_8x8 *sampler_8x8;
2652     struct i965_sampler_8x8_state *sampler_8x8_state;
2653     int index, i;
2654     int width[3], height[3], pitch[3], offset[3];
2655     int src_width, src_height;
2656
2657     /* source surface */
2658     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2659                                          width, height, pitch, offset);
2660     src_width = width[0];
2661     src_height = height[0];
2662
2663     /* destination surface */
2664     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2665                                          width, height, pitch, offset);
2666
2667     /* sampler 8x8 state */
2668     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2669     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2670     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2671     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2672     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2673
2674     for (i = 0; i < 17; i++) {
2675         /* for Y channel, currently ignore */
2676         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2677         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2678         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2679         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2680         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2681         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2682         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2683         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2684         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2685         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2686         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2687         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2688         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2689         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2690         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2691         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2692         /* for U/V channel, 0.25 */
2693         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2694         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2695         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2696         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2697         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2698         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2699         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2700         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2701         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2702         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2703         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2704         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2705         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2706         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2707         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2708         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2709     }
2710
2711     sampler_8x8_state->dw136.default_sharpness_level = 0;
2712     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2713     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2714     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2715     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2716
2717     /* sampler 8x8 */
2718     dri_bo_map(pp_context->sampler_state_table.bo, True);
2719     assert(pp_context->sampler_state_table.bo->virtual);
2720     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2721     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2722
2723     /* sample_8x8 Y index 4 */
2724     index = 4;
2725     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2726     sampler_8x8[index].dw0.global_noise_estimation = 255;
2727     sampler_8x8[index].dw0.ief_bypass = 1;
2728
2729     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2730
2731     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2732     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2733     sampler_8x8[index].dw2.r5x_coefficient = 9;
2734     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2735     sampler_8x8[index].dw2.r5c_coefficient = 3;
2736
2737     sampler_8x8[index].dw3.r3x_coefficient = 27;
2738     sampler_8x8[index].dw3.r3c_coefficient = 5;
2739     sampler_8x8[index].dw3.gain_factor = 40;
2740     sampler_8x8[index].dw3.non_edge_weight = 1;
2741     sampler_8x8[index].dw3.regular_weight = 2;
2742     sampler_8x8[index].dw3.strong_edge_weight = 7;
2743     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2744
2745     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2746                       I915_GEM_DOMAIN_RENDER, 
2747                       0,
2748                       0,
2749                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2750                       pp_context->sampler_state_table.bo_8x8);
2751
2752     /* sample_8x8 UV index 8 */
2753     index = 8;
2754     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2755     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2756     sampler_8x8[index].dw0.global_noise_estimation = 255;
2757     sampler_8x8[index].dw0.ief_bypass = 1;
2758     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2759     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2760     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2761     sampler_8x8[index].dw2.r5x_coefficient = 9;
2762     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2763     sampler_8x8[index].dw2.r5c_coefficient = 3;
2764     sampler_8x8[index].dw3.r3x_coefficient = 27;
2765     sampler_8x8[index].dw3.r3c_coefficient = 5;
2766     sampler_8x8[index].dw3.gain_factor = 40;
2767     sampler_8x8[index].dw3.non_edge_weight = 1;
2768     sampler_8x8[index].dw3.regular_weight = 2;
2769     sampler_8x8[index].dw3.strong_edge_weight = 7;
2770     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2771
2772     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2773                       I915_GEM_DOMAIN_RENDER, 
2774                       0,
2775                       0,
2776                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2777                       pp_context->sampler_state_table.bo_8x8);
2778
2779     /* sampler_8x8 V, index 12 */
2780     index = 12;
2781     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2782     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2783     sampler_8x8[index].dw0.global_noise_estimation = 255;
2784     sampler_8x8[index].dw0.ief_bypass = 1;
2785     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2786     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2787     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2788     sampler_8x8[index].dw2.r5x_coefficient = 9;
2789     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2790     sampler_8x8[index].dw2.r5c_coefficient = 3;
2791     sampler_8x8[index].dw3.r3x_coefficient = 27;
2792     sampler_8x8[index].dw3.r3c_coefficient = 5;
2793     sampler_8x8[index].dw3.gain_factor = 40;
2794     sampler_8x8[index].dw3.non_edge_weight = 1;
2795     sampler_8x8[index].dw3.regular_weight = 2;
2796     sampler_8x8[index].dw3.strong_edge_weight = 7;
2797     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2798
2799     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2800                       I915_GEM_DOMAIN_RENDER, 
2801                       0,
2802                       0,
2803                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2804                       pp_context->sampler_state_table.bo_8x8);
2805
2806     dri_bo_unmap(pp_context->sampler_state_table.bo);
2807
2808     /* private function & data */
2809     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2810     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2811     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2812
2813     pp_avs_context->dest_x = dst_rect->x;
2814     pp_avs_context->dest_y = dst_rect->y;
2815     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2816     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2817     pp_avs_context->src_w = src_rect->width;
2818     pp_avs_context->src_h = src_rect->height;
2819
2820     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2821     dw = MAX(dw, pp_avs_context->dest_w);
2822
2823     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2824     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2825     pp_static_parameter->grf2.avs_wa_width = dw;
2826     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2827     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2828
2829     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2830     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2831     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2832     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2833
2834     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2835
2836     dst_surface->flags = src_surface->flags;
2837
2838     return VA_STATUS_SUCCESS;
2839 }
2840
2841 static int
2842 pp_dndi_x_steps(void *private_context)
2843 {
2844     return 1;
2845 }
2846
2847 static int
2848 pp_dndi_y_steps(void *private_context)
2849 {
2850     struct pp_dndi_context *pp_dndi_context = private_context;
2851
2852     return pp_dndi_context->dest_h / 4;
2853 }
2854
2855 static int
2856 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2857 {
2858     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2859
2860     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2861     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2862
2863     return 0;
2864 }
2865
2866 static VAStatus
2867 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2868                         const struct i965_surface *src_surface,
2869                         const VARectangle *src_rect,
2870                         struct i965_surface *dst_surface,
2871                         const VARectangle *dst_rect,
2872                         void *filter_param)
2873 {
2874     struct i965_driver_data *i965 = i965_driver_data(ctx);
2875     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2876     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2877     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2878     struct object_surface *obj_surface;
2879     struct i965_sampler_dndi *sampler_dndi;
2880     int index;
2881     int w, h;
2882     int orig_w, orig_h;
2883     int dndi_top_first = 1;
2884
2885     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2886         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2887
2888     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2889         dndi_top_first = 1;
2890     else
2891         dndi_top_first = 0;
2892
2893     /* surface */
2894     obj_surface = SURFACE(src_surface->id);
2895     orig_w = obj_surface->orig_width;
2896     orig_h = obj_surface->orig_height;
2897     w = obj_surface->width;
2898     h = obj_surface->height;
2899
2900     if (pp_context->stmm.bo == NULL) {
2901         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2902                                            "STMM surface",
2903                                            w * h,
2904                                            4096);
2905         assert(pp_context->stmm.bo);
2906     }
2907
2908     /* source UV surface index 2 */
2909     i965_pp_set_surface_state(ctx, pp_context,
2910                               obj_surface->bo, w * h,
2911                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2912                               2, 0);
2913
2914     /* source YUV surface index 4 */
2915     i965_pp_set_surface2_state(ctx, pp_context,
2916                                obj_surface->bo, 0,
2917                                orig_w, orig_h, w,
2918                                0, h,
2919                                SURFACE_FORMAT_PLANAR_420_8, 1,
2920                                4);
2921
2922     /* source STMM surface index 20 */
2923     i965_pp_set_surface_state(ctx, pp_context,
2924                               pp_context->stmm.bo, 0,
2925                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2926                               20, 1);
2927
2928     /* destination surface */
2929     obj_surface = SURFACE(dst_surface->id);
2930     orig_w = obj_surface->orig_width;
2931     orig_h = obj_surface->orig_height;
2932     w = obj_surface->width;
2933     h = obj_surface->height;
2934
2935     /* destination Y surface index 7 */
2936     i965_pp_set_surface_state(ctx, pp_context,
2937                               obj_surface->bo, 0,
2938                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2939                               7, 1);
2940
2941     /* destination UV surface index 8 */
2942     i965_pp_set_surface_state(ctx, pp_context,
2943                               obj_surface->bo, w * h,
2944                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2945                               8, 1);
2946     /* sampler dndi */
2947     dri_bo_map(pp_context->sampler_state_table.bo, True);
2948     assert(pp_context->sampler_state_table.bo->virtual);
2949     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2950     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2951
2952     /* sample dndi index 1 */
2953     index = 0;
2954     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2955     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2956     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2957     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2958
2959     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2960     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2961     sampler_dndi[index].dw1.stmm_c2 = 1;
2962     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2963     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2964
2965     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2966     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2967     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2968     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2969
2970     sampler_dndi[index].dw3.maximum_stmm = 128;
2971     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2972     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2973     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2974     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2975
2976     sampler_dndi[index].dw4.sdi_delta = 8;
2977     sampler_dndi[index].dw4.sdi_threshold = 128;
2978     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2979     sampler_dndi[index].dw4.stmm_shift_up = 0;
2980     sampler_dndi[index].dw4.stmm_shift_down = 0;
2981     sampler_dndi[index].dw4.minimum_stmm = 0;
2982
2983     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2984     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2985     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2986     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2987
2988     sampler_dndi[index].dw6.dn_enable = 1;
2989     sampler_dndi[index].dw6.di_enable = 1;
2990     sampler_dndi[index].dw6.di_partial = 0;
2991     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2992     sampler_dndi[index].dw6.dndi_stream_id = 0;
2993     sampler_dndi[index].dw6.dndi_first_frame = 1;
2994     sampler_dndi[index].dw6.progressive_dn = 0;
2995     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2996     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2997     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2998
2999     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3000     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3001     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3002     sampler_dndi[index].dw7.column_width_minus1 = 0;
3003
3004     dri_bo_unmap(pp_context->sampler_state_table.bo);
3005
3006     /* private function & data */
3007     pp_context->pp_x_steps = pp_dndi_x_steps;
3008     pp_context->pp_y_steps = pp_dndi_y_steps;
3009     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3010
3011     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3012     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3013     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3014     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3015
3016     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3017     pp_inline_parameter->grf5.number_blocks = w / 16;
3018     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3019     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3020
3021     pp_dndi_context->dest_w = w;
3022     pp_dndi_context->dest_h = h;
3023
3024     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3025
3026     return VA_STATUS_SUCCESS;
3027 }
3028
3029 static int
3030 pp_dn_x_steps(void *private_context)
3031 {
3032     return 1;
3033 }
3034
3035 static int
3036 pp_dn_y_steps(void *private_context)
3037 {
3038     struct pp_dn_context *pp_dn_context = private_context;
3039
3040     return pp_dn_context->dest_h / 8;
3041 }
3042
3043 static int
3044 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3045 {
3046     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3047
3048     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3049     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3050
3051     return 0;
3052 }
3053
3054 static VAStatus
3055 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3056                       const struct i965_surface *src_surface,
3057                       const VARectangle *src_rect,
3058                       struct i965_surface *dst_surface,
3059                       const VARectangle *dst_rect,
3060                       void *filter_param)
3061 {
3062     struct i965_driver_data *i965 = i965_driver_data(ctx);
3063     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3064     struct object_surface *obj_surface;
3065     struct i965_sampler_dndi *sampler_dndi;
3066     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3067     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3068     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3069     int index;
3070     int w, h;
3071     int orig_w, orig_h;
3072     int dn_strength = 15;
3073     int dndi_top_first = 1;
3074     int dn_progressive = 0;
3075
3076     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3077         dndi_top_first = 1;
3078         dn_progressive = 1;
3079     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3080         dndi_top_first = 1;
3081         dn_progressive = 0;
3082     } else {
3083         dndi_top_first = 0;
3084         dn_progressive = 0;
3085     }
3086
3087     if (dn_filter_param) {
3088         float value = dn_filter_param->value;
3089         
3090         if (value > 1.0)
3091             value = 1.0;
3092         
3093         if (value < 0.0)
3094             value = 0.0;
3095
3096         dn_strength = (int)(value * 31.0F);
3097     }
3098
3099     /* surface */
3100     obj_surface = SURFACE(src_surface->id);
3101     orig_w = obj_surface->orig_width;
3102     orig_h = obj_surface->orig_height;
3103     w = obj_surface->width;
3104     h = obj_surface->height;
3105
3106     if (pp_context->stmm.bo == NULL) {
3107         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3108                                            "STMM surface",
3109                                            w * h,
3110                                            4096);
3111         assert(pp_context->stmm.bo);
3112     }
3113
3114     /* source UV surface index 2 */
3115     i965_pp_set_surface_state(ctx, pp_context,
3116                               obj_surface->bo, w * h,
3117                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3118                               2, 0);
3119
3120     /* source YUV surface index 4 */
3121     i965_pp_set_surface2_state(ctx, pp_context,
3122                                obj_surface->bo, 0,
3123                                orig_w, orig_h, w,
3124                                0, h,
3125                                SURFACE_FORMAT_PLANAR_420_8, 1,
3126                                4);
3127
3128     /* source STMM surface index 20 */
3129     i965_pp_set_surface_state(ctx, pp_context,
3130                               pp_context->stmm.bo, 0,
3131                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3132                               20, 1);
3133
3134     /* destination surface */
3135     obj_surface = SURFACE(dst_surface->id);
3136     orig_w = obj_surface->orig_width;
3137     orig_h = obj_surface->orig_height;
3138     w = obj_surface->width;
3139     h = obj_surface->height;
3140
3141     /* destination Y surface index 7 */
3142     i965_pp_set_surface_state(ctx, pp_context,
3143                               obj_surface->bo, 0,
3144                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3145                               7, 1);
3146
3147     /* destination UV surface index 8 */
3148     i965_pp_set_surface_state(ctx, pp_context,
3149                               obj_surface->bo, w * h,
3150                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3151                               8, 1);
3152     /* sampler dn */
3153     dri_bo_map(pp_context->sampler_state_table.bo, True);
3154     assert(pp_context->sampler_state_table.bo->virtual);
3155     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3156     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3157
3158     /* sample dndi index 1 */
3159     index = 0;
3160     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3161     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3162     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3163     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3164
3165     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3166     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3167     sampler_dndi[index].dw1.stmm_c2 = 0;
3168     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3169     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3170
3171     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3172     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3173     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3174     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3175
3176     sampler_dndi[index].dw3.maximum_stmm = 128;
3177     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3178     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3179     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3180     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3181
3182     sampler_dndi[index].dw4.sdi_delta = 8;
3183     sampler_dndi[index].dw4.sdi_threshold = 128;
3184     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3185     sampler_dndi[index].dw4.stmm_shift_up = 0;
3186     sampler_dndi[index].dw4.stmm_shift_down = 0;
3187     sampler_dndi[index].dw4.minimum_stmm = 0;
3188
3189     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3190     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3191     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3192     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3193
3194     sampler_dndi[index].dw6.dn_enable = 1;
3195     sampler_dndi[index].dw6.di_enable = 0;
3196     sampler_dndi[index].dw6.di_partial = 0;
3197     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3198     sampler_dndi[index].dw6.dndi_stream_id = 1;
3199     sampler_dndi[index].dw6.dndi_first_frame = 1;
3200     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3201     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3202     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3203     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3204
3205     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3206     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3207     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3208     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3209
3210     dri_bo_unmap(pp_context->sampler_state_table.bo);
3211
3212     /* private function & data */
3213     pp_context->pp_x_steps = pp_dn_x_steps;
3214     pp_context->pp_y_steps = pp_dn_y_steps;
3215     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3216
3217     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3218     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3219     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3220     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3221
3222     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3223     pp_inline_parameter->grf5.number_blocks = w / 16;
3224     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3225     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3226
3227     pp_dn_context->dest_w = w;
3228     pp_dn_context->dest_h = h;
3229
3230     dst_surface->flags = src_surface->flags;
3231     
3232     return VA_STATUS_SUCCESS;
3233 }
3234
3235 static int
3236 gen7_pp_dndi_x_steps(void *private_context)
3237 {
3238     struct pp_dndi_context *pp_dndi_context = private_context;
3239
3240     return pp_dndi_context->dest_w / 16;
3241 }
3242
3243 static int
3244 gen7_pp_dndi_y_steps(void *private_context)
3245 {
3246     struct pp_dndi_context *pp_dndi_context = private_context;
3247
3248     return pp_dndi_context->dest_h / 4;
3249 }
3250
3251 static int
3252 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3253 {
3254     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3255
3256     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3257     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3258
3259     return 0;
3260 }
3261
3262 static VAStatus
3263 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3264                              const struct i965_surface *src_surface,
3265                              const VARectangle *src_rect,
3266                              struct i965_surface *dst_surface,
3267                              const VARectangle *dst_rect,
3268                              void *filter_param)
3269 {
3270     struct i965_driver_data *i965 = i965_driver_data(ctx);
3271     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3272     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3273     struct object_surface *obj_surface;
3274     struct gen7_sampler_dndi *sampler_dndi;
3275     int index;
3276     int w, h;
3277     int orig_w, orig_h;
3278     int dndi_top_first = 1;
3279
3280     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3281         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3282
3283     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3284         dndi_top_first = 1;
3285     else
3286         dndi_top_first = 0;
3287
3288     /* surface */
3289     obj_surface = SURFACE(src_surface->id);
3290     orig_w = obj_surface->orig_width;
3291     orig_h = obj_surface->orig_height;
3292     w = obj_surface->width;
3293     h = obj_surface->height;
3294
3295     if (pp_context->stmm.bo == NULL) {
3296         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3297                                            "STMM surface",
3298                                            w * h,
3299                                            4096);
3300         assert(pp_context->stmm.bo);
3301     }
3302
3303     /* source UV surface index 1 */
3304     gen7_pp_set_surface_state(ctx, pp_context,
3305                               obj_surface->bo, w * h,
3306                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3307                               1, 0);
3308
3309     /* source YUV surface index 3 */
3310     gen7_pp_set_surface2_state(ctx, pp_context,
3311                                obj_surface->bo, 0,
3312                                orig_w, orig_h, w,
3313                                0, h,
3314                                SURFACE_FORMAT_PLANAR_420_8, 1,
3315                                3);
3316
3317     /* source (temporal reference) YUV surface index 4 */
3318     gen7_pp_set_surface2_state(ctx, pp_context,
3319                                obj_surface->bo, 0,
3320                                orig_w, orig_h, w,
3321                                0, h,
3322                                SURFACE_FORMAT_PLANAR_420_8, 1,
3323                                4);
3324
3325     /* STMM / History Statistics input surface, index 5 */
3326     gen7_pp_set_surface_state(ctx, pp_context,
3327                               pp_context->stmm.bo, 0,
3328                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3329                               5, 1);
3330
3331     /* destination surface */
3332     obj_surface = SURFACE(dst_surface->id);
3333     orig_w = obj_surface->orig_width;
3334     orig_h = obj_surface->orig_height;
3335     w = obj_surface->width;
3336     h = obj_surface->height;
3337
3338     /* destination(Previous frame) Y surface index 27 */
3339     gen7_pp_set_surface_state(ctx, pp_context,
3340                               obj_surface->bo, 0,
3341                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3342                               27, 1);
3343
3344     /* destination(Previous frame) UV surface index 28 */
3345     gen7_pp_set_surface_state(ctx, pp_context,
3346                               obj_surface->bo, w * h,
3347                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3348                               28, 1);
3349
3350     /* destination(Current frame) Y surface index 30 */
3351     gen7_pp_set_surface_state(ctx, pp_context,
3352                               obj_surface->bo, 0,
3353                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3354                               30, 1);
3355
3356     /* destination(Current frame) UV surface index 31 */
3357     gen7_pp_set_surface_state(ctx, pp_context,
3358                               obj_surface->bo, w * h,
3359                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3360                               31, 1);
3361
3362     /* STMM output surface, index 33 */
3363     gen7_pp_set_surface_state(ctx, pp_context,
3364                               pp_context->stmm.bo, 0,
3365                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3366                               33, 1);
3367
3368
3369     /* sampler dndi */
3370     dri_bo_map(pp_context->sampler_state_table.bo, True);
3371     assert(pp_context->sampler_state_table.bo->virtual);
3372     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3373     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3374
3375     /* sample dndi index 0 */
3376     index = 0;
3377     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3378     sampler_dndi[index].dw0.dnmh_delt = 8;
3379     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3380     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3381     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3382     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3383
3384     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3385     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3386     sampler_dndi[index].dw1.stmm_c2 = 0;
3387     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3388     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3389
3390     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3391     sampler_dndi[index].dw2.bne_edge_th = 1;
3392     sampler_dndi[index].dw2.smooth_mv_th = 0;
3393     sampler_dndi[index].dw2.sad_tight_th = 5;
3394     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3395     sampler_dndi[index].dw2.good_neighbor_th = 4;
3396
3397     sampler_dndi[index].dw3.maximum_stmm = 128;
3398     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3399     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3400     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3401     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3402
3403     sampler_dndi[index].dw4.sdi_delta = 8;
3404     sampler_dndi[index].dw4.sdi_threshold = 128;
3405     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3406     sampler_dndi[index].dw4.stmm_shift_up = 0;
3407     sampler_dndi[index].dw4.stmm_shift_down = 0;
3408     sampler_dndi[index].dw4.minimum_stmm = 0;
3409
3410     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3411     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3412     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3413     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3414
3415     sampler_dndi[index].dw6.dn_enable = 0;
3416     sampler_dndi[index].dw6.di_enable = 1;
3417     sampler_dndi[index].dw6.di_partial = 0;
3418     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3419     sampler_dndi[index].dw6.dndi_stream_id = 1;
3420     sampler_dndi[index].dw6.dndi_first_frame = 1;
3421     sampler_dndi[index].dw6.progressive_dn = 0;
3422     sampler_dndi[index].dw6.mcdi_enable = 0;
3423     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3424     sampler_dndi[index].dw6.cat_th1 = 0;
3425     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3426     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3427
3428     sampler_dndi[index].dw7.sad_tha = 5;
3429     sampler_dndi[index].dw7.sad_thb = 10;
3430     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3431     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3432     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3433     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3434     sampler_dndi[index].dw7.neighborpixel_th = 10;
3435     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3436
3437     dri_bo_unmap(pp_context->sampler_state_table.bo);
3438
3439     /* private function & data */
3440     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3441     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3442     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3443
3444     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3445     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3446     pp_static_parameter->grf1.di_top_field_first = 0;
3447     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3448
3449     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3450     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3451     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3452
3453     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3454     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3455
3456     pp_dndi_context->dest_w = w;
3457     pp_dndi_context->dest_h = h;
3458
3459     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3460
3461     return VA_STATUS_SUCCESS;
3462 }
3463
3464 static int
3465 gen7_pp_dn_x_steps(void *private_context)
3466 {
3467     struct pp_dn_context *pp_dn_context = private_context;
3468
3469     return pp_dn_context->dest_w / 16;
3470 }
3471
3472 static int
3473 gen7_pp_dn_y_steps(void *private_context)
3474 {
3475     struct pp_dn_context *pp_dn_context = private_context;
3476
3477     return pp_dn_context->dest_h / 4;
3478 }
3479
3480 static int
3481 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3482 {
3483     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3484
3485     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3486     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3487
3488     return 0;
3489 }
3490
3491 static VAStatus
3492 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3493                            const struct i965_surface *src_surface,
3494                            const VARectangle *src_rect,
3495                            struct i965_surface *dst_surface,
3496                            const VARectangle *dst_rect,
3497                            void *filter_param)
3498 {
3499     struct i965_driver_data *i965 = i965_driver_data(ctx);
3500     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3501     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3502     struct object_surface *obj_surface;
3503     struct gen7_sampler_dndi *sampler_dn;
3504     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3505     int index;
3506     int w, h;
3507     int orig_w, orig_h;
3508     int dn_strength = 15;
3509     int dndi_top_first = 1;
3510     int dn_progressive = 0;
3511
3512     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3513         dndi_top_first = 1;
3514         dn_progressive = 1;
3515     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3516         dndi_top_first = 1;
3517         dn_progressive = 0;
3518     } else {
3519         dndi_top_first = 0;
3520         dn_progressive = 0;
3521     }
3522
3523     if (dn_filter_param) {
3524         float value = dn_filter_param->value;
3525         
3526         if (value > 1.0)
3527             value = 1.0;
3528         
3529         if (value < 0.0)
3530             value = 0.0;
3531
3532         dn_strength = (int)(value * 31.0F);
3533     }
3534
3535     /* surface */
3536     obj_surface = SURFACE(src_surface->id);
3537     orig_w = obj_surface->orig_width;
3538     orig_h = obj_surface->orig_height;
3539     w = obj_surface->width;
3540     h = obj_surface->height;
3541
3542     if (pp_context->stmm.bo == NULL) {
3543         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3544                                            "STMM surface",
3545                                            w * h,
3546                                            4096);
3547         assert(pp_context->stmm.bo);
3548     }
3549
3550     /* source UV surface index 1 */
3551     gen7_pp_set_surface_state(ctx, pp_context,
3552                               obj_surface->bo, w * h,
3553                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3554                               1, 0);
3555
3556     /* source YUV surface index 3 */
3557     gen7_pp_set_surface2_state(ctx, pp_context,
3558                                obj_surface->bo, 0,
3559                                orig_w, orig_h, w,
3560                                0, h,
3561                                SURFACE_FORMAT_PLANAR_420_8, 1,
3562                                3);
3563
3564     /* source (temporal reference) YUV surface index 4 */
3565     gen7_pp_set_surface2_state(ctx, pp_context,
3566                                obj_surface->bo, 0,
3567                                orig_w, orig_h, w,
3568                                0, h,
3569                                SURFACE_FORMAT_PLANAR_420_8, 1,
3570                                4);
3571
3572     /* STMM / History Statistics input surface, index 5 */
3573     gen7_pp_set_surface_state(ctx, pp_context,
3574                               pp_context->stmm.bo, 0,
3575                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3576                               5, 1);
3577
3578     /* destination surface */
3579     obj_surface = SURFACE(dst_surface->id);
3580     orig_w = obj_surface->orig_width;
3581     orig_h = obj_surface->orig_height;
3582     w = obj_surface->width;
3583     h = obj_surface->height;
3584
3585     /* destination Y surface index 24 */
3586     gen7_pp_set_surface_state(ctx, pp_context,
3587                               obj_surface->bo, 0,
3588                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3589                               24, 1);
3590
3591     /* destination UV surface index 25 */
3592     gen7_pp_set_surface_state(ctx, pp_context,
3593                               obj_surface->bo, w * h,
3594                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3595                               25, 1);
3596
3597     /* sampler dn */
3598     dri_bo_map(pp_context->sampler_state_table.bo, True);
3599     assert(pp_context->sampler_state_table.bo->virtual);
3600     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3601     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3602
3603     /* sample dn index 1 */
3604     index = 0;
3605     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3606     sampler_dn[index].dw0.dnmh_delt = 8;
3607     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3608     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3609     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3610     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3611
3612     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3613     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3614     sampler_dn[index].dw1.stmm_c2 = 0;
3615     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3616     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3617
3618     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3619     sampler_dn[index].dw2.bne_edge_th = 1;
3620     sampler_dn[index].dw2.smooth_mv_th = 0;
3621     sampler_dn[index].dw2.sad_tight_th = 5;
3622     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3623     sampler_dn[index].dw2.good_neighbor_th = 4;
3624
3625     sampler_dn[index].dw3.maximum_stmm = 128;
3626     sampler_dn[index].dw3.multipler_for_vecm = 2;
3627     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3628     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3629     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3630
3631     sampler_dn[index].dw4.sdi_delta = 8;
3632     sampler_dn[index].dw4.sdi_threshold = 128;
3633     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3634     sampler_dn[index].dw4.stmm_shift_up = 0;
3635     sampler_dn[index].dw4.stmm_shift_down = 0;
3636     sampler_dn[index].dw4.minimum_stmm = 0;
3637
3638     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3639     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3640     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3641     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3642
3643     sampler_dn[index].dw6.dn_enable = 1;
3644     sampler_dn[index].dw6.di_enable = 0;
3645     sampler_dn[index].dw6.di_partial = 0;
3646     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3647     sampler_dn[index].dw6.dndi_stream_id = 1;
3648     sampler_dn[index].dw6.dndi_first_frame = 1;
3649     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3650     sampler_dn[index].dw6.mcdi_enable = 0;
3651     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3652     sampler_dn[index].dw6.cat_th1 = 0;
3653     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3654     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3655
3656     sampler_dn[index].dw7.sad_tha = 5;
3657     sampler_dn[index].dw7.sad_thb = 10;
3658     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3659     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3660     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3661     sampler_dn[index].dw7.vdi_walker_enable = 0;
3662     sampler_dn[index].dw7.neighborpixel_th = 10;
3663     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3664
3665     dri_bo_unmap(pp_context->sampler_state_table.bo);
3666
3667     /* private function & data */
3668     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3669     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3670     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3671
3672     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3673     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3674     pp_static_parameter->grf1.di_top_field_first = 0;
3675     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3676
3677     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3678     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3679     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3680
3681     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3682     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3683
3684     pp_dn_context->dest_w = w;
3685     pp_dn_context->dest_h = h;
3686
3687     dst_surface->flags = src_surface->flags;
3688
3689     return VA_STATUS_SUCCESS;
3690 }
3691
3692 static VAStatus
3693 ironlake_pp_initialize(
3694     VADriverContextP   ctx,
3695     struct i965_post_processing_context *pp_context,
3696     const struct i965_surface *src_surface,
3697     const VARectangle *src_rect,
3698     struct i965_surface *dst_surface,
3699     const VARectangle *dst_rect,
3700     int                pp_index,
3701     void *filter_param
3702 )
3703 {
3704     VAStatus va_status;
3705     struct i965_driver_data *i965 = i965_driver_data(ctx);
3706     struct pp_module *pp_module;
3707     dri_bo *bo;
3708     int static_param_size, inline_param_size;
3709
3710     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3711     bo = dri_bo_alloc(i965->intel.bufmgr,
3712                       "surface state & binding table",
3713                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3714                       4096);
3715     assert(bo);
3716     pp_context->surface_state_binding_table.bo = bo;
3717
3718     dri_bo_unreference(pp_context->curbe.bo);
3719     bo = dri_bo_alloc(i965->intel.bufmgr,
3720                       "constant buffer",
3721                       4096, 
3722                       4096);
3723     assert(bo);
3724     pp_context->curbe.bo = bo;
3725
3726     dri_bo_unreference(pp_context->idrt.bo);
3727     bo = dri_bo_alloc(i965->intel.bufmgr, 
3728                       "interface discriptor", 
3729                       sizeof(struct i965_interface_descriptor), 
3730                       4096);
3731     assert(bo);
3732     pp_context->idrt.bo = bo;
3733     pp_context->idrt.num_interface_descriptors = 0;
3734
3735     dri_bo_unreference(pp_context->sampler_state_table.bo);
3736     bo = dri_bo_alloc(i965->intel.bufmgr, 
3737                       "sampler state table", 
3738                       4096,
3739                       4096);
3740     assert(bo);
3741     dri_bo_map(bo, True);
3742     memset(bo->virtual, 0, bo->size);
3743     dri_bo_unmap(bo);
3744     pp_context->sampler_state_table.bo = bo;
3745
3746     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3747     bo = dri_bo_alloc(i965->intel.bufmgr, 
3748                       "sampler 8x8 state ",
3749                       4096,
3750                       4096);
3751     assert(bo);
3752     pp_context->sampler_state_table.bo_8x8 = bo;
3753
3754     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3755     bo = dri_bo_alloc(i965->intel.bufmgr, 
3756                       "sampler 8x8 state ",
3757                       4096,
3758                       4096);
3759     assert(bo);
3760     pp_context->sampler_state_table.bo_8x8_uv = bo;
3761
3762     dri_bo_unreference(pp_context->vfe_state.bo);
3763     bo = dri_bo_alloc(i965->intel.bufmgr, 
3764                       "vfe state", 
3765                       sizeof(struct i965_vfe_state), 
3766                       4096);
3767     assert(bo);
3768     pp_context->vfe_state.bo = bo;
3769
3770     static_param_size = sizeof(struct pp_static_parameter);
3771     inline_param_size = sizeof(struct pp_inline_parameter);
3772
3773     memset(pp_context->pp_static_parameter, 0, static_param_size);
3774     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3775     
3776     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3777     pp_context->current_pp = pp_index;
3778     pp_module = &pp_context->pp_modules[pp_index];
3779     
3780     if (pp_module->initialize)
3781         va_status = pp_module->initialize(ctx, pp_context,
3782                                           src_surface,
3783                                           src_rect,
3784                                           dst_surface,
3785                                           dst_rect,
3786                                           filter_param);
3787     else
3788         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3789
3790     return va_status;
3791 }
3792
3793 static VAStatus
3794 ironlake_post_processing(
3795     VADriverContextP   ctx,
3796     struct i965_post_processing_context *pp_context,
3797     const struct i965_surface *src_surface,
3798     const VARectangle *src_rect,
3799     struct i965_surface *dst_surface,
3800     const VARectangle *dst_rect,
3801     int                pp_index,
3802     void *filter_param
3803 )
3804 {
3805     VAStatus va_status;
3806
3807     va_status = ironlake_pp_initialize(ctx, pp_context,
3808                                        src_surface,
3809                                        src_rect,
3810                                        dst_surface,
3811                                        dst_rect,
3812                                        pp_index,
3813                                        filter_param);
3814
3815     if (va_status == VA_STATUS_SUCCESS) {
3816         ironlake_pp_states_setup(ctx, pp_context);
3817         ironlake_pp_pipeline_setup(ctx, pp_context);
3818     }
3819
3820     return va_status;
3821 }
3822
3823 static VAStatus
3824 gen6_pp_initialize(
3825     VADriverContextP   ctx,
3826     struct i965_post_processing_context *pp_context,
3827     const struct i965_surface *src_surface,
3828     const VARectangle *src_rect,
3829     struct i965_surface *dst_surface,
3830     const VARectangle *dst_rect,
3831     int                pp_index,
3832     void *filter_param
3833 )
3834 {
3835     VAStatus va_status;
3836     struct i965_driver_data *i965 = i965_driver_data(ctx);
3837     struct pp_module *pp_module;
3838     dri_bo *bo;
3839     int static_param_size, inline_param_size;
3840
3841     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3842     bo = dri_bo_alloc(i965->intel.bufmgr,
3843                       "surface state & binding table",
3844                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3845                       4096);
3846     assert(bo);
3847     pp_context->surface_state_binding_table.bo = bo;
3848
3849     dri_bo_unreference(pp_context->curbe.bo);
3850     bo = dri_bo_alloc(i965->intel.bufmgr,
3851                       "constant buffer",
3852                       4096, 
3853                       4096);
3854     assert(bo);
3855     pp_context->curbe.bo = bo;
3856
3857     dri_bo_unreference(pp_context->idrt.bo);
3858     bo = dri_bo_alloc(i965->intel.bufmgr, 
3859                       "interface discriptor", 
3860                       sizeof(struct gen6_interface_descriptor_data), 
3861                       4096);
3862     assert(bo);
3863     pp_context->idrt.bo = bo;
3864     pp_context->idrt.num_interface_descriptors = 0;
3865
3866     dri_bo_unreference(pp_context->sampler_state_table.bo);
3867     bo = dri_bo_alloc(i965->intel.bufmgr, 
3868                       "sampler state table", 
3869                       4096,
3870                       4096);
3871     assert(bo);
3872     dri_bo_map(bo, True);
3873     memset(bo->virtual, 0, bo->size);
3874     dri_bo_unmap(bo);
3875     pp_context->sampler_state_table.bo = bo;
3876
3877     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3878     bo = dri_bo_alloc(i965->intel.bufmgr, 
3879                       "sampler 8x8 state ",
3880                       4096,
3881                       4096);
3882     assert(bo);
3883     pp_context->sampler_state_table.bo_8x8 = bo;
3884
3885     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3886     bo = dri_bo_alloc(i965->intel.bufmgr, 
3887                       "sampler 8x8 state ",
3888                       4096,
3889                       4096);
3890     assert(bo);
3891     pp_context->sampler_state_table.bo_8x8_uv = bo;
3892
3893     dri_bo_unreference(pp_context->vfe_state.bo);
3894     bo = dri_bo_alloc(i965->intel.bufmgr, 
3895                       "vfe state", 
3896                       sizeof(struct i965_vfe_state), 
3897                       4096);
3898     assert(bo);
3899     pp_context->vfe_state.bo = bo;
3900     
3901     if (IS_GEN7(i965->intel.device_id)) {
3902         static_param_size = sizeof(struct gen7_pp_static_parameter);
3903         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3904     } else {
3905         static_param_size = sizeof(struct pp_static_parameter);
3906         inline_param_size = sizeof(struct pp_inline_parameter);
3907     }
3908
3909     memset(pp_context->pp_static_parameter, 0, static_param_size);
3910     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3911
3912     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3913     pp_context->current_pp = pp_index;
3914     pp_module = &pp_context->pp_modules[pp_index];
3915     
3916     if (pp_module->initialize)
3917         va_status = pp_module->initialize(ctx, pp_context,
3918                                           src_surface,
3919                                           src_rect,
3920                                           dst_surface,
3921                                           dst_rect,
3922                                           filter_param);
3923     else
3924         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3925
3926     calculate_boundary_block_mask(pp_context, dst_rect);
3927     
3928     return va_status;
3929 }
3930
3931 static void
3932 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3933                                    struct i965_post_processing_context *pp_context)
3934 {
3935     struct i965_driver_data *i965 = i965_driver_data(ctx);
3936     struct gen6_interface_descriptor_data *desc;
3937     dri_bo *bo;
3938     int pp_index = pp_context->current_pp;
3939
3940     bo = pp_context->idrt.bo;
3941     dri_bo_map(bo, True);
3942     assert(bo->virtual);
3943     desc = bo->virtual;
3944     memset(desc, 0, sizeof(*desc));
3945     desc->desc0.kernel_start_pointer = 
3946         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3947     desc->desc1.single_program_flow = 1;
3948     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3949     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3950     desc->desc2.sampler_state_pointer = 
3951         pp_context->sampler_state_table.bo->offset >> 5;
3952     desc->desc3.binding_table_entry_count = 0;
3953     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3954     desc->desc4.constant_urb_entry_read_offset = 0;
3955
3956     if (IS_GEN7(i965->intel.device_id))
3957         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3958     else
3959         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3960
3961     dri_bo_emit_reloc(bo,
3962                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3963                       0,
3964                       offsetof(struct gen6_interface_descriptor_data, desc0),
3965                       pp_context->pp_modules[pp_index].kernel.bo);
3966
3967     dri_bo_emit_reloc(bo,
3968                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3969                       desc->desc2.sampler_count << 2,
3970                       offsetof(struct gen6_interface_descriptor_data, desc2),
3971                       pp_context->sampler_state_table.bo);
3972
3973     dri_bo_unmap(bo);
3974     pp_context->idrt.num_interface_descriptors++;
3975 }
3976
3977 static void
3978 gen6_pp_upload_constants(VADriverContextP ctx,
3979                          struct i965_post_processing_context *pp_context)
3980 {
3981     struct i965_driver_data *i965 = i965_driver_data(ctx);
3982     unsigned char *constant_buffer;
3983     int param_size;
3984
3985     assert(sizeof(struct pp_static_parameter) == 128);
3986     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3987
3988     if (IS_GEN7(i965->intel.device_id))
3989         param_size = sizeof(struct gen7_pp_static_parameter);
3990     else
3991         param_size = sizeof(struct pp_static_parameter);
3992
3993     dri_bo_map(pp_context->curbe.bo, 1);
3994     assert(pp_context->curbe.bo->virtual);
3995     constant_buffer = pp_context->curbe.bo->virtual;
3996     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3997     dri_bo_unmap(pp_context->curbe.bo);
3998 }
3999
4000 static void
4001 gen6_pp_states_setup(VADriverContextP ctx,
4002                      struct i965_post_processing_context *pp_context)
4003 {
4004     gen6_pp_interface_descriptor_table(ctx, pp_context);
4005     gen6_pp_upload_constants(ctx, pp_context);
4006 }
4007
4008 static void
4009 gen6_pp_pipeline_select(VADriverContextP ctx,
4010                         struct i965_post_processing_context *pp_context)
4011 {
4012     struct intel_batchbuffer *batch = pp_context->batch;
4013
4014     BEGIN_BATCH(batch, 1);
4015     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4016     ADVANCE_BATCH(batch);
4017 }
4018
4019 static void
4020 gen6_pp_state_base_address(VADriverContextP ctx,
4021                            struct i965_post_processing_context *pp_context)
4022 {
4023     struct intel_batchbuffer *batch = pp_context->batch;
4024
4025     BEGIN_BATCH(batch, 10);
4026     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4027     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4028     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4029     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4030     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4031     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4032     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4033     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4034     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4035     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4036     ADVANCE_BATCH(batch);
4037 }
4038
4039 static void
4040 gen6_pp_vfe_state(VADriverContextP ctx,
4041                   struct i965_post_processing_context *pp_context)
4042 {
4043     struct intel_batchbuffer *batch = pp_context->batch;
4044
4045     BEGIN_BATCH(batch, 8);
4046     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4047     OUT_BATCH(batch, 0);
4048     OUT_BATCH(batch,
4049               (pp_context->urb.num_vfe_entries - 1) << 16 |
4050               pp_context->urb.num_vfe_entries << 8);
4051     OUT_BATCH(batch, 0);
4052     OUT_BATCH(batch,
4053               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
4054               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
4055     OUT_BATCH(batch, 0);
4056     OUT_BATCH(batch, 0);
4057     OUT_BATCH(batch, 0);
4058     ADVANCE_BATCH(batch);
4059 }
4060
4061 static void
4062 gen6_pp_curbe_load(VADriverContextP ctx,
4063                    struct i965_post_processing_context *pp_context)
4064 {
4065     struct intel_batchbuffer *batch = pp_context->batch;
4066
4067     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
4068
4069     BEGIN_BATCH(batch, 4);
4070     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4071     OUT_BATCH(batch, 0);
4072     OUT_BATCH(batch,
4073               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
4074     OUT_RELOC(batch, 
4075               pp_context->curbe.bo,
4076               I915_GEM_DOMAIN_INSTRUCTION, 0,
4077               0);
4078     ADVANCE_BATCH(batch);
4079 }
4080
4081 static void
4082 gen6_interface_descriptor_load(VADriverContextP ctx,
4083                                struct i965_post_processing_context *pp_context)
4084 {
4085     struct intel_batchbuffer *batch = pp_context->batch;
4086
4087     BEGIN_BATCH(batch, 4);
4088     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4089     OUT_BATCH(batch, 0);
4090     OUT_BATCH(batch,
4091               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4092     OUT_RELOC(batch, 
4093               pp_context->idrt.bo,
4094               I915_GEM_DOMAIN_INSTRUCTION, 0,
4095               0);
4096     ADVANCE_BATCH(batch);
4097 }
4098
4099 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4100 {
4101     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4102
4103     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4104     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4105     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4106     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4107     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4108     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4109
4110     /* 1 x N */
4111     if (x_steps == 1) {
4112         if (y == y_steps-1) {
4113             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4114         }
4115         else {
4116             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4117         }
4118     }
4119
4120     /* M x 1 */
4121     if (y_steps == 1) {
4122         if (x == 0) { // all blocks in this group are on the left edge
4123             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4124             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4125         }
4126         else if (x == x_steps-1) {
4127             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4128             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4129         }
4130         else {
4131             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4132             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4133             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4134         }
4135     }
4136
4137 }
4138
4139 static void
4140 gen6_pp_object_walker(VADriverContextP ctx,
4141                       struct i965_post_processing_context *pp_context)
4142 {
4143     struct i965_driver_data *i965 = i965_driver_data(ctx);
4144     struct intel_batchbuffer *batch = pp_context->batch;
4145     int x, x_steps, y, y_steps;
4146     int param_size, command_length_in_dws;
4147     dri_bo *command_buffer;
4148     unsigned int *command_ptr;
4149
4150     if (IS_GEN7(i965->intel.device_id))
4151         param_size = sizeof(struct gen7_pp_inline_parameter);
4152     else
4153         param_size = sizeof(struct pp_inline_parameter);
4154
4155     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
4156     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
4157     command_length_in_dws = 6 + (param_size >> 2);
4158     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4159                                   "command objects buffer",
4160                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4161                                   4096);
4162
4163     dri_bo_map(command_buffer, 1);
4164     command_ptr = command_buffer->virtual;
4165
4166     for (y = 0; y < y_steps; y++) {
4167         for (x = 0; x < x_steps; x++) {
4168             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4169                 // some common block parameter update goes here, apply to all pp functions
4170                 if (IS_GEN6(i965->intel.device_id))
4171                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4172                 
4173                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4174                 *command_ptr++ = 0;
4175                 *command_ptr++ = 0;
4176                 *command_ptr++ = 0;
4177                 *command_ptr++ = 0;
4178                 *command_ptr++ = 0;
4179                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4180                 command_ptr += (param_size >> 2);
4181             }
4182         }
4183     }
4184
4185     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4186         *command_ptr++ = 0;
4187
4188     *command_ptr = MI_BATCH_BUFFER_END;
4189
4190     dri_bo_unmap(command_buffer);
4191
4192     BEGIN_BATCH(batch, 2);
4193     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
4194     OUT_RELOC(batch, command_buffer, 
4195               I915_GEM_DOMAIN_COMMAND, 0, 
4196               0);
4197     ADVANCE_BATCH(batch);
4198     
4199     dri_bo_unreference(command_buffer);
4200
4201     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4202      * will cause control to pass back to ring buffer 
4203      */
4204     intel_batchbuffer_end_atomic(batch);
4205     intel_batchbuffer_flush(batch);
4206     intel_batchbuffer_start_atomic(batch, 0x1000);
4207 }
4208
4209 static void
4210 gen6_pp_pipeline_setup(VADriverContextP ctx,
4211                        struct i965_post_processing_context *pp_context)
4212 {
4213     struct intel_batchbuffer *batch = pp_context->batch;
4214
4215     intel_batchbuffer_start_atomic(batch, 0x1000);
4216     intel_batchbuffer_emit_mi_flush(batch);
4217     gen6_pp_pipeline_select(ctx, pp_context);
4218     gen6_pp_state_base_address(ctx, pp_context);
4219     gen6_pp_vfe_state(ctx, pp_context);
4220     gen6_pp_curbe_load(ctx, pp_context);
4221     gen6_interface_descriptor_load(ctx, pp_context);
4222     gen6_pp_object_walker(ctx, pp_context);
4223     intel_batchbuffer_end_atomic(batch);
4224 }
4225
4226 static VAStatus
4227 gen6_post_processing(
4228     VADriverContextP   ctx,
4229     struct i965_post_processing_context *pp_context,
4230     const struct i965_surface *src_surface,
4231     const VARectangle *src_rect,
4232     struct i965_surface *dst_surface,
4233     const VARectangle *dst_rect,
4234     int                pp_index,
4235     void * filter_param
4236 )
4237 {
4238     VAStatus va_status;
4239     
4240     va_status = gen6_pp_initialize(ctx, pp_context,
4241                                    src_surface,
4242                                    src_rect,
4243                                    dst_surface,
4244                                    dst_rect,
4245                                    pp_index,
4246                                    filter_param);
4247
4248     if (va_status == VA_STATUS_SUCCESS) {
4249         gen6_pp_states_setup(ctx, pp_context);
4250         gen6_pp_pipeline_setup(ctx, pp_context);
4251     }
4252
4253     return va_status;
4254 }
4255
4256 static VAStatus
4257 i965_post_processing_internal(
4258     VADriverContextP   ctx,
4259     struct i965_post_processing_context *pp_context,
4260     const struct i965_surface *src_surface,
4261     const VARectangle *src_rect,
4262     struct i965_surface *dst_surface,
4263     const VARectangle *dst_rect,
4264     int                pp_index,
4265     void *filter_param
4266 )
4267 {
4268     struct i965_driver_data *i965 = i965_driver_data(ctx);
4269     VAStatus va_status;
4270
4271     if (IS_GEN6(i965->intel.device_id) ||
4272         IS_GEN7(i965->intel.device_id))
4273         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4274     else
4275         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4276     
4277     return va_status;
4278 }
4279
4280 VAStatus 
4281 i965_DestroySurfaces(VADriverContextP ctx,
4282                      VASurfaceID *surface_list,
4283                      int num_surfaces);
4284 VAStatus 
4285 i965_CreateSurfaces(VADriverContextP ctx,
4286                     int width,
4287                     int height,
4288                     int format,
4289                     int num_surfaces,
4290                     VASurfaceID *surfaces);
4291
4292 static void
4293 rgb_to_yuv(unsigned int argb,
4294            unsigned char *y,
4295            unsigned char *u,
4296            unsigned char *v,
4297            unsigned char *a)
4298 {
4299     int r = ((argb >> 16) & 0xff);
4300     int g = ((argb >> 8) & 0xff);
4301     int b = ((argb >> 0) & 0xff);
4302     
4303     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4304     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4305     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4306     *a = ((argb >> 24) & 0xff);
4307 }
4308
4309 static void 
4310 i965_vpp_clear_surface(VADriverContextP ctx,
4311                        struct i965_post_processing_context *pp_context,
4312                        VASurfaceID surface,
4313                        unsigned int color)
4314 {
4315     struct i965_driver_data *i965 = i965_driver_data(ctx);
4316     struct intel_batchbuffer *batch = pp_context->batch;
4317     struct object_surface *obj_surface = SURFACE(surface);
4318     unsigned int blt_cmd, br13;
4319     unsigned int tiling = 0, swizzle = 0;
4320     int pitch;
4321     unsigned char y, u, v, a = 0;
4322
4323     /* Currently only support NV12 surface */
4324     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4325         return;
4326
4327     rgb_to_yuv(color, &y, &u, &v, &a);
4328
4329     if (a == 0)
4330         return;
4331
4332     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4333     blt_cmd = XY_COLOR_BLT_CMD;
4334     pitch = obj_surface->width;
4335
4336     if (tiling != I915_TILING_NONE) {
4337         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4338         pitch >>= 2;
4339     }
4340
4341     br13 = 0xf0 << 16;
4342     br13 |= BR13_8;
4343     br13 |= pitch;
4344
4345     if (IS_GEN6(i965->intel.device_id) ||
4346         IS_GEN7(i965->intel.device_id)) {
4347         intel_batchbuffer_start_atomic_blt(batch, 48);
4348         BEGIN_BLT_BATCH(batch, 12);
4349     } else {
4350         intel_batchbuffer_start_atomic(batch, 48);
4351         BEGIN_BATCH(batch, 12);
4352     }
4353
4354     OUT_BATCH(batch, blt_cmd);
4355     OUT_BATCH(batch, br13);
4356     OUT_BATCH(batch,
4357               0 << 16 |
4358               0);
4359     OUT_BATCH(batch,
4360               obj_surface->height << 16 |
4361               obj_surface->width);
4362     OUT_RELOC(batch, obj_surface->bo, 
4363               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4364               0);
4365     OUT_BATCH(batch, y);
4366
4367     br13 = 0xf0 << 16;
4368     br13 |= BR13_565;
4369     br13 |= pitch;
4370
4371     OUT_BATCH(batch, blt_cmd);
4372     OUT_BATCH(batch, br13);
4373     OUT_BATCH(batch,
4374               0 << 16 |
4375               0);
4376     OUT_BATCH(batch,
4377               obj_surface->height / 2 << 16 |
4378               obj_surface->width / 2);
4379     OUT_RELOC(batch, obj_surface->bo, 
4380               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4381               obj_surface->width * obj_surface->y_cb_offset);
4382     OUT_BATCH(batch, v << 8 | u);
4383
4384     ADVANCE_BATCH(batch);
4385     intel_batchbuffer_end_atomic(batch);
4386 }
4387
4388 VAStatus
4389 i965_scaling_processing(
4390     VADriverContextP   ctx,
4391     VASurfaceID        src_surface_id,
4392     const VARectangle *src_rect,
4393     VASurfaceID        dst_surface_id,
4394     const VARectangle *dst_rect,
4395     unsigned int       flags)
4396 {
4397     VAStatus va_status = VA_STATUS_SUCCESS;
4398     struct i965_driver_data *i965 = i965_driver_data(ctx);
4399     struct object_surface *src_surface_obj = SURFACE(src_surface_id);
4400     struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
4401  
4402     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4403     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4404
4405     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4406         struct i965_surface src_surface;
4407         struct i965_surface dst_surface;
4408
4409          _i965LockMutex(&i965->pp_mutex);
4410
4411          src_surface.id = src_surface_id;
4412          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4413          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4414          dst_surface.id = dst_surface_id;
4415          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4416          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4417
4418          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4419                                                    &src_surface,
4420                                                    src_rect,
4421                                                    &dst_surface,
4422                                                    dst_rect,
4423                                                    PP_NV12_AVS,
4424                                                    NULL);
4425
4426          _i965UnlockMutex(&i965->pp_mutex);
4427     }
4428
4429     return va_status;
4430 }
4431
4432 VASurfaceID
4433 i965_post_processing(
4434     VADriverContextP   ctx,
4435     VASurfaceID        surface,
4436     const VARectangle *src_rect,
4437     const VARectangle *dst_rect,
4438     unsigned int       flags,
4439     int               *has_done_scaling  
4440 )
4441 {
4442     struct i965_driver_data *i965 = i965_driver_data(ctx);
4443     VASurfaceID in_surface_id = surface;
4444     VASurfaceID out_surface_id = VA_INVALID_ID;
4445     
4446     *has_done_scaling = 0;
4447
4448     if (HAS_PP(i965)) {
4449         struct object_surface *obj_surface;
4450         VAStatus status;
4451         struct i965_surface src_surface;
4452         struct i965_surface dst_surface;
4453
4454         obj_surface = SURFACE(in_surface_id);
4455
4456         /* Currently only support post processing for NV12 surface */
4457         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4458             return out_surface_id;
4459
4460         _i965LockMutex(&i965->pp_mutex);
4461
4462         if (flags & I965_PP_FLAG_MCDI) {
4463             status = i965_CreateSurfaces(ctx,
4464                                          obj_surface->orig_width,
4465                                          obj_surface->orig_height,
4466                                          VA_RT_FORMAT_YUV420,
4467                                          1,
4468                                          &out_surface_id);
4469             assert(status == VA_STATUS_SUCCESS);
4470             obj_surface = SURFACE(out_surface_id);
4471             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4472             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4473             src_surface.id = in_surface_id;
4474             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4475             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4476                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4477             dst_surface.id = out_surface_id;
4478             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4479             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4480
4481             i965_post_processing_internal(ctx, i965->pp_context,
4482                                           &src_surface,
4483                                           src_rect,
4484                                           &dst_surface,
4485                                           dst_rect,
4486                                           PP_NV12_DNDI,
4487                                           NULL);
4488         }
4489
4490         if (flags & I965_PP_FLAG_AVS) {
4491             struct i965_render_state *render_state = &i965->render_state;
4492             struct intel_region *dest_region = render_state->draw_region;
4493
4494             if (out_surface_id != VA_INVALID_ID)
4495                 in_surface_id = out_surface_id;
4496
4497             status = i965_CreateSurfaces(ctx,
4498                                          dest_region->width,
4499                                          dest_region->height,
4500                                          VA_RT_FORMAT_YUV420,
4501                                          1,
4502                                          &out_surface_id);
4503             assert(status == VA_STATUS_SUCCESS);
4504             obj_surface = SURFACE(out_surface_id);
4505             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4506             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4507             src_surface.id = in_surface_id;
4508             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4509             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4510             dst_surface.id = out_surface_id;
4511             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4512             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4513
4514             i965_post_processing_internal(ctx, i965->pp_context,
4515                                           &src_surface,
4516                                           src_rect,
4517                                           &dst_surface,
4518                                           dst_rect,
4519                                           PP_NV12_AVS,
4520                                           NULL);
4521
4522             if (in_surface_id != surface)
4523                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4524                 
4525             *has_done_scaling = 1;
4526         }
4527
4528         _i965UnlockMutex(&i965->pp_mutex);
4529     }
4530
4531     return out_surface_id;
4532 }       
4533
4534 static VAStatus
4535 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4536                           const struct i965_surface *src_surface,
4537                           const VARectangle *src_rect,
4538                           struct i965_surface *dst_surface,
4539                           const VARectangle *dst_rect)
4540 {
4541     struct i965_driver_data *i965 = i965_driver_data(ctx);
4542     struct i965_post_processing_context *pp_context = i965->pp_context;
4543     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4544
4545     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4546         i965_post_processing_internal(ctx, i965->pp_context,
4547                                       src_surface,
4548                                       src_rect,
4549                                       dst_surface,
4550                                       dst_rect,
4551                                       PP_RGBX_LOAD_SAVE_NV12,
4552                                       NULL);
4553     } else {
4554         assert(0);
4555         return VA_STATUS_ERROR_UNKNOWN;
4556     }
4557
4558     intel_batchbuffer_flush(pp_context->batch);
4559
4560     return VA_STATUS_SUCCESS;
4561 }
4562
4563 static VAStatus
4564 i965_image_pl3_processing(VADriverContextP ctx,
4565                           const struct i965_surface *src_surface,
4566                           const VARectangle *src_rect,
4567                           struct i965_surface *dst_surface,
4568                           const VARectangle *dst_rect)
4569 {
4570     struct i965_driver_data *i965 = i965_driver_data(ctx);
4571     struct i965_post_processing_context *pp_context = i965->pp_context;
4572     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4573     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4574
4575     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4576         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4577                                                  src_surface,
4578                                                  src_rect,
4579                                                  dst_surface,
4580                                                  dst_rect,
4581                                                  PP_PL3_LOAD_SAVE_N12,
4582                                                  NULL);
4583     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4584                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4585                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4586                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4587         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4588                                                  src_surface,
4589                                                  src_rect,
4590                                                  dst_surface,
4591                                                  dst_rect,
4592                                                  PP_PL3_LOAD_SAVE_PL3,
4593                                                  NULL);
4594     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4595                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4596         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4597                                                  src_surface,
4598                                                  src_rect,
4599                                                  dst_surface,
4600                                                  dst_rect,
4601                                                  PP_PL3_LOAD_SAVE_PA,
4602                                                  NULL);
4603     }
4604     else {
4605         assert(0);
4606     }
4607
4608     intel_batchbuffer_flush(pp_context->batch);
4609
4610     return vaStatus;
4611 }
4612
4613 static VAStatus
4614 i965_image_pl2_processing(VADriverContextP ctx,
4615                           const struct i965_surface *src_surface,
4616                           const VARectangle *src_rect,
4617                           struct i965_surface *dst_surface,
4618                           const VARectangle *dst_rect)
4619 {
4620     struct i965_driver_data *i965 = i965_driver_data(ctx);
4621     struct i965_post_processing_context *pp_context = i965->pp_context;
4622     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4623     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4624
4625     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4626         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4627                                                  src_surface,
4628                                                  src_rect,
4629                                                  dst_surface,
4630                                                  dst_rect,
4631                                                  PP_NV12_LOAD_SAVE_N12,
4632                                                  NULL);
4633     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4634                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4635                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4636                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4637         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4638                                                  src_surface,
4639                                                  src_rect,
4640                                                  dst_surface,
4641                                                  dst_rect,
4642                                                  PP_NV12_LOAD_SAVE_PL3,
4643                                                  NULL);
4644     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4645                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4646         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4647                                                  src_surface,
4648                                                  src_rect,
4649                                                  dst_surface,
4650                                                  dst_rect,
4651                                                  PP_NV12_LOAD_SAVE_PA,
4652                                                      NULL);
4653     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4654                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4655                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4656                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4657         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4658                                       src_surface,
4659                                       src_rect,
4660                                       dst_surface,
4661                                       dst_rect,
4662                                       PP_NV12_LOAD_SAVE_RGBX,
4663                                       NULL);
4664     } else {
4665         assert(0);
4666         return VA_STATUS_ERROR_UNKNOWN;
4667     }
4668
4669     intel_batchbuffer_flush(pp_context->batch);
4670
4671     return vaStatus;
4672 }
4673
4674 static VAStatus
4675 i965_image_pl1_processing(VADriverContextP ctx,
4676                           const struct i965_surface *src_surface,
4677                           const VARectangle *src_rect,
4678                           struct i965_surface *dst_surface,
4679                           const VARectangle *dst_rect)
4680 {
4681     struct i965_driver_data *i965 = i965_driver_data(ctx);
4682     struct i965_post_processing_context *pp_context = i965->pp_context;
4683     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4684
4685     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4686         i965_post_processing_internal(ctx, i965->pp_context,
4687                                       src_surface,
4688                                       src_rect,
4689                                       dst_surface,
4690                                       dst_rect,
4691                                       PP_PA_LOAD_SAVE_NV12,
4692                                       NULL);
4693     }
4694     else if (fourcc == VA_FOURCC_YV12) {
4695         i965_post_processing_internal(ctx, i965->pp_context,
4696                                       src_surface,
4697                                       src_rect,
4698                                       dst_surface,
4699                                       dst_rect,
4700                                       PP_PA_LOAD_SAVE_PL3,
4701                                       NULL);
4702
4703     }
4704     else {
4705         return VA_STATUS_ERROR_UNKNOWN;
4706     }
4707
4708     intel_batchbuffer_flush(pp_context->batch);
4709
4710     return VA_STATUS_SUCCESS;
4711 }
4712
4713 VAStatus
4714 i965_image_processing(VADriverContextP ctx,
4715                       const struct i965_surface *src_surface,
4716                       const VARectangle *src_rect,
4717                       struct i965_surface *dst_surface,
4718                       const VARectangle *dst_rect)
4719 {
4720     struct i965_driver_data *i965 = i965_driver_data(ctx);
4721     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4722
4723     if (HAS_PP(i965)) {
4724         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4725
4726         _i965LockMutex(&i965->pp_mutex);
4727
4728         switch (fourcc) {
4729         case VA_FOURCC('Y', 'V', '1', '2'):
4730         case VA_FOURCC('I', '4', '2', '0'):
4731         case VA_FOURCC('I', 'M', 'C', '1'):
4732         case VA_FOURCC('I', 'M', 'C', '3'):
4733             status = i965_image_pl3_processing(ctx,
4734                                                src_surface,
4735                                                src_rect,
4736                                                dst_surface,
4737                                                dst_rect);
4738             break;
4739
4740         case  VA_FOURCC('N', 'V', '1', '2'):
4741             status = i965_image_pl2_processing(ctx,
4742                                                src_surface,
4743                                                src_rect,
4744                                                dst_surface,
4745                                                dst_rect);
4746             break;
4747         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4748         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4749             status = i965_image_pl1_processing(ctx,
4750                                                src_surface,
4751                                                src_rect,
4752                                                dst_surface,
4753                                                dst_rect);
4754             break;
4755         case VA_FOURCC('B', 'G', 'R', 'A'):
4756         case VA_FOURCC('B', 'G', 'R', 'X'):
4757         case VA_FOURCC('R', 'G', 'B', 'A'):
4758         case VA_FOURCC('R', 'G', 'B', 'X'):
4759             status = i965_image_pl1_rgbx_processing(ctx,
4760                                                src_surface,
4761                                                src_rect,
4762                                                dst_surface,
4763                                                dst_rect);
4764             break;
4765         default:
4766             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4767             break;
4768         }
4769         
4770         _i965UnlockMutex(&i965->pp_mutex);
4771     }
4772
4773     return status;
4774 }       
4775
4776 static void
4777 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4778 {
4779     int i;
4780
4781     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4782     pp_context->surface_state_binding_table.bo = NULL;
4783
4784     dri_bo_unreference(pp_context->curbe.bo);
4785     pp_context->curbe.bo = NULL;
4786
4787     dri_bo_unreference(pp_context->sampler_state_table.bo);
4788     pp_context->sampler_state_table.bo = NULL;
4789
4790     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4791     pp_context->sampler_state_table.bo_8x8 = NULL;
4792
4793     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4794     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4795
4796     dri_bo_unreference(pp_context->idrt.bo);
4797     pp_context->idrt.bo = NULL;
4798     pp_context->idrt.num_interface_descriptors = 0;
4799
4800     dri_bo_unreference(pp_context->vfe_state.bo);
4801     pp_context->vfe_state.bo = NULL;
4802
4803     dri_bo_unreference(pp_context->stmm.bo);
4804     pp_context->stmm.bo = NULL;
4805
4806     for (i = 0; i < NUM_PP_MODULES; i++) {
4807         struct pp_module *pp_module = &pp_context->pp_modules[i];
4808
4809         dri_bo_unreference(pp_module->kernel.bo);
4810         pp_module->kernel.bo = NULL;
4811     }
4812
4813     free(pp_context->pp_static_parameter);
4814     free(pp_context->pp_inline_parameter);
4815     pp_context->pp_static_parameter = NULL;
4816     pp_context->pp_inline_parameter = NULL;
4817 }
4818
4819 Bool
4820 i965_post_processing_terminate(VADriverContextP ctx)
4821 {
4822     struct i965_driver_data *i965 = i965_driver_data(ctx);
4823     struct i965_post_processing_context *pp_context = i965->pp_context;
4824
4825     if (pp_context) {
4826         i965_post_processing_context_finalize(pp_context);
4827         free(pp_context);
4828     }
4829
4830     i965->pp_context = NULL;
4831
4832     return True;
4833 }
4834
4835 static void
4836 i965_post_processing_context_init(VADriverContextP ctx,
4837                                   struct i965_post_processing_context *pp_context,
4838                                   struct intel_batchbuffer *batch)
4839 {
4840     struct i965_driver_data *i965 = i965_driver_data(ctx);
4841     int i;
4842
4843     pp_context->urb.size = URB_SIZE((&i965->intel));
4844     pp_context->urb.num_vfe_entries = 32;
4845     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4846     pp_context->urb.num_cs_entries = 1;
4847     
4848     if (IS_GEN7(i965->intel.device_id))
4849         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4850     else
4851         pp_context->urb.size_cs_entry = 2;
4852
4853     pp_context->urb.vfe_start = 0;
4854     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4855         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4856     assert(pp_context->urb.cs_start + 
4857            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4858
4859     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4860     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4861     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4862     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
4863
4864     if (IS_HASWELL(i965->intel.device_id))
4865         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
4866     else if (IS_GEN7(i965->intel.device_id))
4867         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4868     else if (IS_GEN6(i965->intel.device_id))
4869         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4870     else if (IS_IRONLAKE(i965->intel.device_id))
4871         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4872
4873     for (i = 0; i < NUM_PP_MODULES; i++) {
4874         struct pp_module *pp_module = &pp_context->pp_modules[i];
4875         dri_bo_unreference(pp_module->kernel.bo);
4876         if (pp_module->kernel.bin && pp_module->kernel.size) {
4877             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4878                                                 pp_module->kernel.name,
4879                                                 pp_module->kernel.size,
4880                                                 4096);
4881             assert(pp_module->kernel.bo);
4882             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4883         } else {
4884             pp_module->kernel.bo = NULL;
4885         }
4886     }
4887
4888     /* static & inline parameters */
4889     if (IS_GEN7(i965->intel.device_id)) {
4890         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4891         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4892     } else {
4893         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4894         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4895     }
4896
4897     pp_context->batch = batch;
4898 }
4899
4900 Bool
4901 i965_post_processing_init(VADriverContextP ctx)
4902 {
4903     struct i965_driver_data *i965 = i965_driver_data(ctx);
4904     struct i965_post_processing_context *pp_context = i965->pp_context;
4905
4906     if (HAS_PP(i965)) {
4907         if (pp_context == NULL) {
4908             pp_context = calloc(1, sizeof(*pp_context));
4909             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4910             i965->pp_context = pp_context;
4911         }
4912     }
4913
4914     return True;
4915 }
4916
4917 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4918     PP_NULL,    /* VAProcFilterNone */
4919     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4920     PP_NULL,    /* VAProcFilterDeblocking */
4921     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4922     PP_NULL,    /* VAProcFilterSharpening */
4923     PP_NULL,    /* VAProcFilterColorBalance */
4924     PP_NULL,    /* VAProcFilterColorStandard */
4925     PP_NULL,    /* VAProcFilterFrameRateConversion */
4926 };
4927
4928 static const int proc_frame_to_pp_frame[3] = {
4929     I965_SURFACE_FLAG_FRAME,
4930     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4931     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4932 };
4933
4934 void 
4935 i965_proc_picture(VADriverContextP ctx, 
4936                   VAProfile profile, 
4937                   union codec_state *codec_state,
4938                   struct hw_context *hw_context)
4939 {
4940     struct i965_driver_data *i965 = i965_driver_data(ctx);
4941     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4942     struct proc_state *proc_state = &codec_state->proc;
4943     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4944     struct object_surface *obj_surface;
4945     struct i965_surface src_surface, dst_surface;
4946     VARectangle src_rect, dst_rect;
4947     VAStatus status;
4948     int i;
4949     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4950     int num_tmp_surfaces = 0;
4951     unsigned int tiling = 0, swizzle = 0;
4952     int in_width, in_height;
4953
4954     assert(pipeline_param->surface != VA_INVALID_ID);
4955     assert(proc_state->current_render_target != VA_INVALID_ID);
4956
4957     obj_surface = SURFACE(pipeline_param->surface);
4958     in_width = obj_surface->orig_width;
4959     in_height = obj_surface->orig_height;
4960     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4961
4962     src_surface.id = pipeline_param->surface;
4963     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4964     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4965
4966     VASurfaceID out_surface_id = VA_INVALID_ID;
4967     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4968         src_surface.id = pipeline_param->surface;
4969         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4970         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4971         src_rect.x = 0;
4972         src_rect.y = 0;
4973         src_rect.width = in_width;
4974         src_rect.height = in_height;
4975
4976         status = i965_CreateSurfaces(ctx,
4977                                      in_width,
4978                                      in_height,
4979                                      VA_RT_FORMAT_YUV420,
4980                                      1,
4981                                      &out_surface_id);
4982         assert(status == VA_STATUS_SUCCESS);
4983         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4984         obj_surface = SURFACE(out_surface_id);
4985         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4986
4987         dst_surface.id = out_surface_id;
4988         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4989         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4990         dst_rect.x = 0;
4991         dst_rect.y = 0;
4992         dst_rect.width = in_width;
4993         dst_rect.height = in_height;
4994
4995         status = i965_image_processing(ctx,
4996                                        &src_surface,
4997                                        &src_rect,
4998                                        &dst_surface,
4999                                        &dst_rect);
5000         assert(status == VA_STATUS_SUCCESS);
5001
5002         src_surface.id = out_surface_id;
5003         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5004         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5005     }
5006
5007     if (pipeline_param->surface_region) {
5008         src_rect.x = pipeline_param->surface_region->x;
5009         src_rect.y = pipeline_param->surface_region->y;
5010         src_rect.width = pipeline_param->surface_region->width;
5011         src_rect.height = pipeline_param->surface_region->height;
5012     } else {
5013         src_rect.x = 0;
5014         src_rect.y = 0;
5015         src_rect.width = in_width;
5016         src_rect.height = in_height;
5017     }
5018
5019     if (pipeline_param->output_region) {
5020         dst_rect.x = pipeline_param->output_region->x;
5021         dst_rect.y = pipeline_param->output_region->y;
5022         dst_rect.width = pipeline_param->output_region->width;
5023         dst_rect.height = pipeline_param->output_region->height;
5024     } else {
5025         dst_rect.x = 0;
5026         dst_rect.y = 0;
5027         dst_rect.width = in_width;
5028         dst_rect.height = in_height;
5029     }
5030
5031     for (i = 0; i < pipeline_param->num_filters; i++) {
5032         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5033         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5034         VAProcFilterType filter_type = filter_param->type;
5035         out_surface_id = VA_INVALID_ID;
5036         int kernel_index = procfilter_to_pp_flag[filter_type];
5037
5038         if (kernel_index != PP_NULL &&
5039             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5040             status = i965_CreateSurfaces(ctx,
5041                                          in_width,
5042                                          in_height,
5043                                          VA_RT_FORMAT_YUV420,
5044                                          1,
5045                                          &out_surface_id);
5046             assert(status == VA_STATUS_SUCCESS);
5047             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5048             obj_surface = SURFACE(out_surface_id);
5049             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5050             dst_surface.id = out_surface_id;
5051             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5052             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5053                                                    &src_surface,
5054                                                    &src_rect,
5055                                                    &dst_surface,
5056                                                    &src_rect,
5057                                                    kernel_index,
5058                                                    filter_param);
5059
5060             if (status == VA_STATUS_SUCCESS) {
5061                 src_surface.id = dst_surface.id;
5062                 src_surface.type = dst_surface.type;
5063                 src_surface.flags = dst_surface.flags;
5064             }
5065         }
5066     }
5067
5068     obj_surface = SURFACE(proc_state->current_render_target);
5069     int csc_needed = 0;
5070     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
5071         csc_needed = 1;
5072         out_surface_id = VA_INVALID_ID;
5073         status = i965_CreateSurfaces(ctx,
5074                                      obj_surface->orig_width,
5075                                      obj_surface->orig_height,
5076                                      VA_RT_FORMAT_YUV420, 
5077                                      1,
5078                                      &out_surface_id);
5079         assert(status == VA_STATUS_SUCCESS);
5080         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5081         struct object_surface *csc_surface = SURFACE(out_surface_id);
5082         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5083         dst_surface.id = out_surface_id;
5084     } else {
5085         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5086         dst_surface.id = proc_state->current_render_target;
5087     }
5088
5089     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5090     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
5091
5092     // load/save doesn't support different origin offset for src and dst surface
5093     if (src_rect.width == dst_rect.width &&
5094         src_rect.height == dst_rect.height &&
5095         src_rect.x == dst_rect.x &&
5096         src_rect.y == dst_rect.y) {
5097         i965_post_processing_internal(ctx, &proc_context->pp_context,
5098                                       &src_surface,
5099                                       &src_rect,
5100                                       &dst_surface,
5101                                       &dst_rect,
5102                                       PP_NV12_LOAD_SAVE_N12,
5103                                       NULL);
5104     } else {
5105
5106         i965_post_processing_internal(ctx, &proc_context->pp_context,
5107                                       &src_surface,
5108                                       &src_rect,
5109                                       &dst_surface,
5110                                       &dst_rect,
5111                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5112                                       PP_NV12_AVS : PP_NV12_SCALING,
5113                                       NULL);
5114     }
5115
5116     if (csc_needed) {
5117         src_surface.id = dst_surface.id;
5118         src_surface.type = dst_surface.type;
5119         src_surface.flags = dst_surface.flags;
5120         dst_surface.id = proc_state->current_render_target;
5121         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5122         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5123     }
5124     
5125     if (num_tmp_surfaces)
5126         i965_DestroySurfaces(ctx,
5127                              tmp_surfaces,
5128                              num_tmp_surfaces);
5129
5130     intel_batchbuffer_flush(hw_context->batch);
5131 }
5132
5133 static void
5134 i965_proc_context_destroy(void *hw_context)
5135 {
5136     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5137
5138     i965_post_processing_context_finalize(&proc_context->pp_context);
5139     intel_batchbuffer_free(proc_context->base.batch);
5140     free(proc_context);
5141 }
5142
5143 struct hw_context *
5144 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5145 {
5146     struct intel_driver_data *intel = intel_driver_data(ctx);
5147     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5148
5149     proc_context->base.destroy = i965_proc_context_destroy;
5150     proc_context->base.run = i965_proc_picture;
5151     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5152     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5153
5154     return (struct hw_context *)proc_context;
5155 }