Allow to create batchbuffer based on the expected buffer size
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 };
639
640 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
641                                            const struct i965_surface *src_surface,
642                                            const VARectangle *src_rect,
643                                            struct i965_surface *dst_surface,
644                                            const VARectangle *dst_rect,
645                                            void *filter_param);
646 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
647                                              const struct i965_surface *src_surface,
648                                              const VARectangle *src_rect,
649                                              struct i965_surface *dst_surface,
650                                              const VARectangle *dst_rect,
651                                              void *filter_param);
652 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
653                                            const struct i965_surface *src_surface,
654                                            const VARectangle *src_rect,
655                                            struct i965_surface *dst_surface,
656                                            const VARectangle *dst_rect,
657                                            void *filter_param);
658
659 static struct pp_module pp_modules_gen7[] = {
660     {
661         {
662             "NULL module (for testing)",
663             PP_NULL,
664             pp_null_gen7,
665             sizeof(pp_null_gen7),
666             NULL,
667         },
668
669         pp_null_initialize,
670     },
671
672     {
673         {
674             "NV12_NV12",
675             PP_NV12_LOAD_SAVE_N12,
676             pp_nv12_load_save_nv12_gen7,
677             sizeof(pp_nv12_load_save_nv12_gen7),
678             NULL,
679         },
680
681         gen7_pp_plx_avs_initialize,
682     },
683
684     {
685         {
686             "NV12_PL3",
687             PP_NV12_LOAD_SAVE_PL3,
688             pp_nv12_load_save_pl3_gen7,
689             sizeof(pp_nv12_load_save_pl3_gen7),
690             NULL,
691         },
692         
693         gen7_pp_plx_avs_initialize,
694     },
695
696     {
697         {
698             "PL3_NV12",
699             PP_PL3_LOAD_SAVE_N12,
700             pp_pl3_load_save_nv12_gen7,
701             sizeof(pp_pl3_load_save_nv12_gen7),
702             NULL,
703         },
704
705         gen7_pp_plx_avs_initialize,
706     },
707
708     {
709         {
710             "PL3_PL3",
711             PP_PL3_LOAD_SAVE_N12,
712             pp_pl3_load_save_pl3_gen7,
713             sizeof(pp_pl3_load_save_pl3_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12 Scaling module",
723             PP_NV12_SCALING,
724             pp_nv12_scaling_gen7,
725             sizeof(pp_nv12_scaling_gen7),
726             NULL,
727         },
728
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "NV12 AVS module",
735             PP_NV12_AVS,
736             pp_nv12_avs_gen7,
737             sizeof(pp_nv12_avs_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "NV12 DNDI module",
747             PP_NV12_DNDI,
748             pp_nv12_dndi_gen7,
749             sizeof(pp_nv12_dndi_gen7),
750             NULL,
751         },
752
753         gen7_pp_nv12_dndi_initialize,
754     },
755
756     {
757         {
758             "NV12 DN module",
759             PP_NV12_DN,
760             pp_nv12_dn_gen7,
761             sizeof(pp_nv12_dn_gen7),
762             NULL,
763         },
764
765         gen7_pp_nv12_dn_initialize,
766     },
767     {
768         {
769             "NV12_PA module",
770             PP_NV12_LOAD_SAVE_PA,
771             pp_nv12_load_save_pa_gen7,
772             sizeof(pp_nv12_load_save_pa_gen7),
773             NULL,
774         },
775     
776         gen7_pp_plx_avs_initialize,
777     },
778
779     {
780         {
781             "PL3_PA module",
782             PP_PL3_LOAD_SAVE_PA,
783             pp_pl3_load_save_pa_gen7,
784             sizeof(pp_pl3_load_save_pa_gen7),
785             NULL,
786         },
787     
788         gen7_pp_plx_avs_initialize,
789     },
790
791     {
792         {
793             "PA_NV12 module",
794             PP_PA_LOAD_SAVE_NV12,
795             pp_pa_load_save_nv12_gen7,
796             sizeof(pp_pa_load_save_nv12_gen7),
797             NULL,
798         },
799     
800         gen7_pp_plx_avs_initialize,
801     },
802
803     {
804         {
805             "PA_PL3 module",
806             PP_PA_LOAD_SAVE_PL3,
807             pp_pa_load_save_pl3_gen7,
808             sizeof(pp_pa_load_save_pl3_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814     
815     {
816         {
817             "RGBX_NV12 module",
818             PP_RGBX_LOAD_SAVE_NV12,
819             pp_rgbx_load_save_nv12_gen7,
820             sizeof(pp_rgbx_load_save_nv12_gen7),
821             NULL,
822         },
823     
824         pp_plx_load_save_plx_initialize,
825     },
826
827     {
828         {
829             "NV12_RGBX module",
830             PP_NV12_LOAD_SAVE_RGBX,
831             pp_nv12_load_save_rgbx_gen7,
832             sizeof(pp_nv12_load_save_rgbx_gen7),
833             NULL,
834         },
835     
836         pp_plx_load_save_plx_initialize,
837     },
838             
839 };
840
841 static const uint32_t pp_null_gen75[][4] = {
842 };
843
844 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
845 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
846 };
847
848 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
849 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
850 };
851
852 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
853 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
854 };
855
856 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
857 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
858 };
859
860 static const uint32_t pp_nv12_scaling_gen75[][4] = {
861 #include "shaders/post_processing/gen7/avs.g75b"
862 };
863
864 static const uint32_t pp_nv12_avs_gen75[][4] = {
865 #include "shaders/post_processing/gen7/avs.g75b"
866 };
867
868 static const uint32_t pp_nv12_dndi_gen75[][4] = {
869 // #include "shaders/post_processing/gen7/dndi.g75b"
870 };
871
872 static const uint32_t pp_nv12_dn_gen75[][4] = {
873 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
874 };
875 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
876 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
877 };
878 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
879 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
880 };
881 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
882 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
883 };
884 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
885 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
886 };
887 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
888 };
889 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
890 };
891
892 static struct pp_module pp_modules_gen75[] = {
893     {
894         {
895             "NULL module (for testing)",
896             PP_NULL,
897             pp_null_gen75,
898             sizeof(pp_null_gen75),
899             NULL,
900         },
901
902         pp_null_initialize,
903     },
904
905     {
906         {
907             "NV12_NV12",
908             PP_NV12_LOAD_SAVE_N12,
909             pp_nv12_load_save_nv12_gen75,
910             sizeof(pp_nv12_load_save_nv12_gen75),
911             NULL,
912         },
913
914         gen7_pp_plx_avs_initialize,
915     },
916
917     {
918         {
919             "NV12_PL3",
920             PP_NV12_LOAD_SAVE_PL3,
921             pp_nv12_load_save_pl3_gen75,
922             sizeof(pp_nv12_load_save_pl3_gen75),
923             NULL,
924         },
925         
926         gen7_pp_plx_avs_initialize,
927     },
928
929     {
930         {
931             "PL3_NV12",
932             PP_PL3_LOAD_SAVE_N12,
933             pp_pl3_load_save_nv12_gen75,
934             sizeof(pp_pl3_load_save_nv12_gen75),
935             NULL,
936         },
937
938         gen7_pp_plx_avs_initialize,
939     },
940
941     {
942         {
943             "PL3_PL3",
944             PP_PL3_LOAD_SAVE_N12,
945             pp_pl3_load_save_pl3_gen75,
946             sizeof(pp_pl3_load_save_pl3_gen75),
947             NULL,
948         },
949
950         gen7_pp_plx_avs_initialize,
951     },
952
953     {
954         {
955             "NV12 Scaling module",
956             PP_NV12_SCALING,
957             pp_nv12_scaling_gen75,
958             sizeof(pp_nv12_scaling_gen75),
959             NULL,
960         },
961
962         gen7_pp_plx_avs_initialize,
963     },
964
965     {
966         {
967             "NV12 AVS module",
968             PP_NV12_AVS,
969             pp_nv12_avs_gen75,
970             sizeof(pp_nv12_avs_gen75),
971             NULL,
972         },
973
974         gen7_pp_plx_avs_initialize,
975     },
976
977     {
978         {
979             "NV12 DNDI module",
980             PP_NV12_DNDI,
981             pp_nv12_dndi_gen75,
982             sizeof(pp_nv12_dndi_gen75),
983             NULL,
984         },
985
986         gen7_pp_nv12_dndi_initialize,
987     },
988
989     {
990         {
991             "NV12 DN module",
992             PP_NV12_DN,
993             pp_nv12_dn_gen75,
994             sizeof(pp_nv12_dn_gen75),
995             NULL,
996         },
997
998         gen7_pp_nv12_dn_initialize,
999     },
1000     {
1001         {
1002             "NV12_PA module",
1003             PP_NV12_LOAD_SAVE_PA,
1004             pp_nv12_load_save_pa_gen75,
1005             sizeof(pp_nv12_load_save_pa_gen75),
1006             NULL,
1007         },
1008     
1009         gen7_pp_plx_avs_initialize,
1010     },
1011
1012     {
1013         {
1014             "PL3_PA module",
1015             PP_PL3_LOAD_SAVE_PA,
1016             pp_pl3_load_save_pa_gen75,
1017             sizeof(pp_pl3_load_save_pa_gen75),
1018             NULL,
1019         },
1020     
1021         gen7_pp_plx_avs_initialize,
1022     },
1023
1024     {
1025         {
1026             "PA_NV12 module",
1027             PP_PA_LOAD_SAVE_NV12,
1028             pp_pa_load_save_nv12_gen75,
1029             sizeof(pp_pa_load_save_nv12_gen75),
1030             NULL,
1031         },
1032     
1033         gen7_pp_plx_avs_initialize,
1034     },
1035
1036     {
1037         {
1038             "PA_PL3 module",
1039             PP_PA_LOAD_SAVE_PL3,
1040             pp_pa_load_save_pl3_gen75,
1041             sizeof(pp_pa_load_save_pl3_gen75),
1042             NULL,
1043         },
1044     
1045         gen7_pp_plx_avs_initialize,
1046     },
1047     
1048     {
1049         {
1050             "RGBX_NV12 module",
1051             PP_RGBX_LOAD_SAVE_NV12,
1052             pp_rgbx_load_save_nv12_gen75,
1053             sizeof(pp_rgbx_load_save_nv12_gen75),
1054             NULL,
1055         },
1056     
1057         pp_plx_load_save_plx_initialize,
1058     },
1059
1060     {
1061         {
1062             "NV12_RGBX module",
1063             PP_NV12_LOAD_SAVE_RGBX,
1064             pp_nv12_load_save_rgbx_gen75,
1065             sizeof(pp_nv12_load_save_rgbx_gen75),
1066             NULL,
1067         },
1068     
1069         pp_plx_load_save_plx_initialize,
1070     },
1071             
1072 };
1073
1074 static int
1075 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1076 {
1077     struct i965_driver_data *i965 = i965_driver_data(ctx);
1078     int fourcc;
1079
1080     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1081         struct object_image *obj_image = IMAGE(surface->id);
1082         fourcc = obj_image->image.format.fourcc;
1083     } else {
1084         struct object_surface *obj_surface = SURFACE(surface->id);
1085         fourcc = obj_surface->fourcc;
1086     }
1087
1088     return fourcc;
1089 }
1090
1091 static void
1092 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1093 {
1094     switch (tiling) {
1095     case I915_TILING_NONE:
1096         ss->ss3.tiled_surface = 0;
1097         ss->ss3.tile_walk = 0;
1098         break;
1099     case I915_TILING_X:
1100         ss->ss3.tiled_surface = 1;
1101         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1102         break;
1103     case I915_TILING_Y:
1104         ss->ss3.tiled_surface = 1;
1105         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1106         break;
1107     }
1108 }
1109
1110 static void
1111 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1112 {
1113     switch (tiling) {
1114     case I915_TILING_NONE:
1115         ss->ss2.tiled_surface = 0;
1116         ss->ss2.tile_walk = 0;
1117         break;
1118     case I915_TILING_X:
1119         ss->ss2.tiled_surface = 1;
1120         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1121         break;
1122     case I915_TILING_Y:
1123         ss->ss2.tiled_surface = 1;
1124         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1125         break;
1126     }
1127 }
1128
1129 static void
1130 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1131 {
1132     switch (tiling) {
1133     case I915_TILING_NONE:
1134         ss->ss0.tiled_surface = 0;
1135         ss->ss0.tile_walk = 0;
1136         break;
1137     case I915_TILING_X:
1138         ss->ss0.tiled_surface = 1;
1139         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1140         break;
1141     case I915_TILING_Y:
1142         ss->ss0.tiled_surface = 1;
1143         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1144         break;
1145     }
1146 }
1147
1148 static void
1149 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1150 {
1151     switch (tiling) {
1152     case I915_TILING_NONE:
1153         ss->ss2.tiled_surface = 0;
1154         ss->ss2.tile_walk = 0;
1155         break;
1156     case I915_TILING_X:
1157         ss->ss2.tiled_surface = 1;
1158         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1159         break;
1160     case I915_TILING_Y:
1161         ss->ss2.tiled_surface = 1;
1162         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1163         break;
1164     }
1165 }
1166
1167 static void
1168 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1169 {
1170     struct i965_interface_descriptor *desc;
1171     dri_bo *bo;
1172     int pp_index = pp_context->current_pp;
1173
1174     bo = pp_context->idrt.bo;
1175     dri_bo_map(bo, 1);
1176     assert(bo->virtual);
1177     desc = bo->virtual;
1178     memset(desc, 0, sizeof(*desc));
1179     desc->desc0.grf_reg_blocks = 10;
1180     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1181     desc->desc1.const_urb_entry_read_offset = 0;
1182     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1183     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1184     desc->desc2.sampler_count = 0;
1185     desc->desc3.binding_table_entry_count = 0;
1186     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1187
1188     dri_bo_emit_reloc(bo,
1189                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1190                       desc->desc0.grf_reg_blocks,
1191                       offsetof(struct i965_interface_descriptor, desc0),
1192                       pp_context->pp_modules[pp_index].kernel.bo);
1193
1194     dri_bo_emit_reloc(bo,
1195                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1196                       desc->desc2.sampler_count << 2,
1197                       offsetof(struct i965_interface_descriptor, desc2),
1198                       pp_context->sampler_state_table.bo);
1199
1200     dri_bo_unmap(bo);
1201     pp_context->idrt.num_interface_descriptors++;
1202 }
1203
1204 static void
1205 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1206 {
1207     struct i965_vfe_state *vfe_state;
1208     dri_bo *bo;
1209
1210     bo = pp_context->vfe_state.bo;
1211     dri_bo_map(bo, 1);
1212     assert(bo->virtual);
1213     vfe_state = bo->virtual;
1214     memset(vfe_state, 0, sizeof(*vfe_state));
1215     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1216     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1217     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1218     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1219     vfe_state->vfe1.children_present = 0;
1220     vfe_state->vfe2.interface_descriptor_base = 
1221         pp_context->idrt.bo->offset >> 4; /* reloc */
1222     dri_bo_emit_reloc(bo,
1223                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1224                       0,
1225                       offsetof(struct i965_vfe_state, vfe2),
1226                       pp_context->idrt.bo);
1227     dri_bo_unmap(bo);
1228 }
1229
1230 static void
1231 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1232 {
1233     unsigned char *constant_buffer;
1234     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1235
1236     assert(sizeof(*pp_static_parameter) == 128);
1237     dri_bo_map(pp_context->curbe.bo, 1);
1238     assert(pp_context->curbe.bo->virtual);
1239     constant_buffer = pp_context->curbe.bo->virtual;
1240     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1241     dri_bo_unmap(pp_context->curbe.bo);
1242 }
1243
1244 static void
1245 ironlake_pp_states_setup(VADriverContextP ctx,
1246                          struct i965_post_processing_context *pp_context)
1247 {
1248     ironlake_pp_interface_descriptor_table(pp_context);
1249     ironlake_pp_vfe_state(pp_context);
1250     ironlake_pp_upload_constants(pp_context);
1251 }
1252
1253 static void
1254 ironlake_pp_pipeline_select(VADriverContextP ctx,
1255                             struct i965_post_processing_context *pp_context)
1256 {
1257     struct intel_batchbuffer *batch = pp_context->batch;
1258
1259     BEGIN_BATCH(batch, 1);
1260     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1261     ADVANCE_BATCH(batch);
1262 }
1263
1264 static void
1265 ironlake_pp_urb_layout(VADriverContextP ctx,
1266                        struct i965_post_processing_context *pp_context)
1267 {
1268     struct intel_batchbuffer *batch = pp_context->batch;
1269     unsigned int vfe_fence, cs_fence;
1270
1271     vfe_fence = pp_context->urb.cs_start;
1272     cs_fence = pp_context->urb.size;
1273
1274     BEGIN_BATCH(batch, 3);
1275     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 
1278               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1279               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1280     ADVANCE_BATCH(batch);
1281 }
1282
1283 static void
1284 ironlake_pp_state_base_address(VADriverContextP ctx,
1285                                struct i965_post_processing_context *pp_context)
1286 {
1287     struct intel_batchbuffer *batch = pp_context->batch;
1288
1289     BEGIN_BATCH(batch, 8);
1290     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1291     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1292     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1293     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1294     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1295     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1296     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1297     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1298     ADVANCE_BATCH(batch);
1299 }
1300
1301 static void
1302 ironlake_pp_state_pointers(VADriverContextP ctx,
1303                            struct i965_post_processing_context *pp_context)
1304 {
1305     struct intel_batchbuffer *batch = pp_context->batch;
1306
1307     BEGIN_BATCH(batch, 3);
1308     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1309     OUT_BATCH(batch, 0);
1310     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1311     ADVANCE_BATCH(batch);
1312 }
1313
1314 static void 
1315 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1316                           struct i965_post_processing_context *pp_context)
1317 {
1318     struct intel_batchbuffer *batch = pp_context->batch;
1319
1320     BEGIN_BATCH(batch, 2);
1321     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1322     OUT_BATCH(batch,
1323               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1324               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1325     ADVANCE_BATCH(batch);
1326 }
1327
1328 static void
1329 ironlake_pp_constant_buffer(VADriverContextP ctx,
1330                             struct i965_post_processing_context *pp_context)
1331 {
1332     struct intel_batchbuffer *batch = pp_context->batch;
1333
1334     BEGIN_BATCH(batch, 2);
1335     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1336     OUT_RELOC(batch, pp_context->curbe.bo,
1337               I915_GEM_DOMAIN_INSTRUCTION, 0,
1338               pp_context->urb.size_cs_entry - 1);
1339     ADVANCE_BATCH(batch);    
1340 }
1341
1342 static void
1343 ironlake_pp_object_walker(VADriverContextP ctx,
1344                           struct i965_post_processing_context *pp_context)
1345 {
1346     struct intel_batchbuffer *batch = pp_context->batch;
1347     int x, x_steps, y, y_steps;
1348     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1349
1350     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1351     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1352
1353     for (y = 0; y < y_steps; y++) {
1354         for (x = 0; x < x_steps; x++) {
1355             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1356                 BEGIN_BATCH(batch, 20);
1357                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1358                 OUT_BATCH(batch, 0);
1359                 OUT_BATCH(batch, 0); /* no indirect data */
1360                 OUT_BATCH(batch, 0);
1361
1362                 /* inline data grf 5-6 */
1363                 assert(sizeof(*pp_inline_parameter) == 64);
1364                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1365
1366                 ADVANCE_BATCH(batch);
1367             }
1368         }
1369     }
1370 }
1371
1372 static void
1373 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1374                            struct i965_post_processing_context *pp_context)
1375 {
1376     struct intel_batchbuffer *batch = pp_context->batch;
1377
1378     intel_batchbuffer_start_atomic(batch, 0x1000);
1379     intel_batchbuffer_emit_mi_flush(batch);
1380     ironlake_pp_pipeline_select(ctx, pp_context);
1381     ironlake_pp_state_base_address(ctx, pp_context);
1382     ironlake_pp_state_pointers(ctx, pp_context);
1383     ironlake_pp_urb_layout(ctx, pp_context);
1384     ironlake_pp_cs_urb_layout(ctx, pp_context);
1385     ironlake_pp_constant_buffer(ctx, pp_context);
1386     ironlake_pp_object_walker(ctx, pp_context);
1387     intel_batchbuffer_end_atomic(batch);
1388 }
1389
1390 // update u/v offset when the surface format are packed yuv
1391 static void i965_update_src_surface_static_parameter(
1392     VADriverContextP    ctx, 
1393     struct i965_post_processing_context *pp_context,
1394     const struct i965_surface *surface)
1395 {
1396     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1397     int fourcc = pp_get_surface_fourcc(ctx, surface);
1398
1399     switch (fourcc) {
1400     case VA_FOURCC('Y', 'U', 'Y', '2'):
1401         pp_static_parameter->grf1.source_packed_u_offset = 1;
1402         pp_static_parameter->grf1.source_packed_v_offset = 3;
1403         break;
1404     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1405         pp_static_parameter->grf1.source_packed_y_offset = 1;
1406         pp_static_parameter->grf1.source_packed_v_offset = 2;
1407         break;
1408     case VA_FOURCC('B', 'G', 'R', 'X'):
1409     case VA_FOURCC('B', 'G', 'R', 'A'):
1410         pp_static_parameter->grf1.source_rgb_layout = 0;
1411         break;
1412     case VA_FOURCC('R', 'G', 'B', 'X'):
1413     case VA_FOURCC('R', 'G', 'B', 'A'):
1414         pp_static_parameter->grf1.source_rgb_layout = 1;
1415         break;
1416     default:
1417         break;
1418     }
1419     
1420 }
1421
1422 static void i965_update_dst_surface_static_parameter(
1423     VADriverContextP    ctx, 
1424     struct i965_post_processing_context *pp_context,
1425     const struct i965_surface *surface)
1426 {
1427     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1428     int fourcc = pp_get_surface_fourcc(ctx, surface);
1429
1430     switch (fourcc) {
1431     case VA_FOURCC('Y', 'U', 'Y', '2'):
1432         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1433         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1434         break;
1435     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1436         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1437         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1438         break;
1439     case VA_FOURCC('B', 'G', 'R', 'X'):
1440     case VA_FOURCC('B', 'G', 'R', 'A'):
1441         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1442         break;
1443     case VA_FOURCC('R', 'G', 'B', 'X'):
1444     case VA_FOURCC('R', 'G', 'B', 'A'):
1445         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1446         break;
1447     default:
1448         break;
1449     }
1450     
1451 }
1452
1453 static void
1454 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1455                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1456                           int width, int height, int pitch, int format, 
1457                           int index, int is_target)
1458 {
1459     struct i965_surface_state *ss;
1460     dri_bo *ss_bo;
1461     unsigned int tiling;
1462     unsigned int swizzle;
1463
1464     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1465     ss_bo = pp_context->surface_state_binding_table.bo;
1466     assert(ss_bo);
1467
1468     dri_bo_map(ss_bo, True);
1469     assert(ss_bo->virtual);
1470     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1471     memset(ss, 0, sizeof(*ss));
1472     ss->ss0.surface_type = I965_SURFACE_2D;
1473     ss->ss0.surface_format = format;
1474     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1475     ss->ss2.width = width - 1;
1476     ss->ss2.height = height - 1;
1477     ss->ss3.pitch = pitch - 1;
1478     pp_set_surface_tiling(ss, tiling);
1479     dri_bo_emit_reloc(ss_bo,
1480                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1481                       surf_bo_offset,
1482                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1483                       surf_bo);
1484     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1485     dri_bo_unmap(ss_bo);
1486 }
1487
1488 static void
1489 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1490                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1491                            int width, int height, int wpitch,
1492                            int xoffset, int yoffset,
1493                            int format, int interleave_chroma,
1494                            int index)
1495 {
1496     struct i965_surface_state2 *ss2;
1497     dri_bo *ss2_bo;
1498     unsigned int tiling;
1499     unsigned int swizzle;
1500
1501     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1502     ss2_bo = pp_context->surface_state_binding_table.bo;
1503     assert(ss2_bo);
1504
1505     dri_bo_map(ss2_bo, True);
1506     assert(ss2_bo->virtual);
1507     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1508     memset(ss2, 0, sizeof(*ss2));
1509     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1510     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1511     ss2->ss1.width = width - 1;
1512     ss2->ss1.height = height - 1;
1513     ss2->ss2.pitch = wpitch - 1;
1514     ss2->ss2.interleave_chroma = interleave_chroma;
1515     ss2->ss2.surface_format = format;
1516     ss2->ss3.x_offset_for_cb = xoffset;
1517     ss2->ss3.y_offset_for_cb = yoffset;
1518     pp_set_surface2_tiling(ss2, tiling);
1519     dri_bo_emit_reloc(ss2_bo,
1520                       I915_GEM_DOMAIN_RENDER, 0,
1521                       surf_bo_offset,
1522                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1523                       surf_bo);
1524     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1525     dri_bo_unmap(ss2_bo);
1526 }
1527
1528 static void
1529 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1530                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1531                           int width, int height, int pitch, int format, 
1532                           int index, int is_target)
1533 {
1534     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1535     struct gen7_surface_state *ss;
1536     dri_bo *ss_bo;
1537     unsigned int tiling;
1538     unsigned int swizzle;
1539
1540     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1541     ss_bo = pp_context->surface_state_binding_table.bo;
1542     assert(ss_bo);
1543
1544     dri_bo_map(ss_bo, True);
1545     assert(ss_bo->virtual);
1546     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1547     memset(ss, 0, sizeof(*ss));
1548     ss->ss0.surface_type = I965_SURFACE_2D;
1549     ss->ss0.surface_format = format;
1550     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1551     ss->ss2.width = width - 1;
1552     ss->ss2.height = height - 1;
1553     ss->ss3.pitch = pitch - 1;
1554     gen7_pp_set_surface_tiling(ss, tiling);
1555     if (IS_HASWELL(i965->intel.device_id))
1556         gen7_render_set_surface_scs(ss);
1557     dri_bo_emit_reloc(ss_bo,
1558                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1559                       surf_bo_offset,
1560                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1561                       surf_bo);
1562     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1563     dri_bo_unmap(ss_bo);
1564 }
1565
1566 static void
1567 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1568                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1569                            int width, int height, int wpitch,
1570                            int xoffset, int yoffset,
1571                            int format, int interleave_chroma,
1572                            int index)
1573 {
1574     struct gen7_surface_state2 *ss2;
1575     dri_bo *ss2_bo;
1576     unsigned int tiling;
1577     unsigned int swizzle;
1578
1579     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1580     ss2_bo = pp_context->surface_state_binding_table.bo;
1581     assert(ss2_bo);
1582
1583     dri_bo_map(ss2_bo, True);
1584     assert(ss2_bo->virtual);
1585     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1586     memset(ss2, 0, sizeof(*ss2));
1587     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1588     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1589     ss2->ss1.width = width - 1;
1590     ss2->ss1.height = height - 1;
1591     ss2->ss2.pitch = wpitch - 1;
1592     ss2->ss2.interleave_chroma = interleave_chroma;
1593     ss2->ss2.surface_format = format;
1594     ss2->ss3.x_offset_for_cb = xoffset;
1595     ss2->ss3.y_offset_for_cb = yoffset;
1596     gen7_pp_set_surface2_tiling(ss2, tiling);
1597     dri_bo_emit_reloc(ss2_bo,
1598                       I915_GEM_DOMAIN_RENDER, 0,
1599                       surf_bo_offset,
1600                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1601                       surf_bo);
1602     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1603     dri_bo_unmap(ss2_bo);
1604 }
1605
1606 static void 
1607 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1608                                 const struct i965_surface *surface, 
1609                                 int base_index, int is_target,
1610                                 int *width, int *height, int *pitch, int *offset)
1611 {
1612     struct i965_driver_data *i965 = i965_driver_data(ctx);
1613     struct object_surface *obj_surface;
1614     struct object_image *obj_image;
1615     dri_bo *bo;
1616     int fourcc = pp_get_surface_fourcc(ctx, surface);
1617     const int Y = 0;
1618     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1619     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1620     const int UV = 1;
1621     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1622     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1623     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1624                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1625                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1626                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1627     int scale_factor_of_1st_plane_width_in_byte = 1;
1628                               
1629     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1630         obj_surface = SURFACE(surface->id);
1631         bo = obj_surface->bo;
1632         width[0] = obj_surface->orig_width;
1633         height[0] = obj_surface->orig_height;
1634         pitch[0] = obj_surface->width;
1635         offset[0] = 0;
1636
1637         if (full_packed_format) {
1638             scale_factor_of_1st_plane_width_in_byte = 4; 
1639             pitch[0] = obj_surface->width * 4;
1640         }
1641         else if (packed_yuv ) {
1642             scale_factor_of_1st_plane_width_in_byte =  2; 
1643             pitch[0] = obj_surface->width * 2;
1644         }
1645         else if (interleaved_uv) {
1646             width[1] = obj_surface->orig_width;
1647             height[1] = obj_surface->orig_height / 2;
1648             pitch[1] = obj_surface->width;
1649             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1650         } else {
1651             width[1] = obj_surface->orig_width / 2;
1652             height[1] = obj_surface->orig_height / 2;
1653             pitch[1] = obj_surface->width / 2;
1654             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1655             width[2] = obj_surface->orig_width / 2;
1656             height[2] = obj_surface->orig_height / 2;
1657             pitch[2] = obj_surface->width / 2;
1658             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1659         }
1660     } else {
1661         obj_image = IMAGE(surface->id);
1662         bo = obj_image->bo;
1663         width[0] = obj_image->image.width;
1664         height[0] = obj_image->image.height;
1665         pitch[0] = obj_image->image.pitches[0];
1666         offset[0] = obj_image->image.offsets[0];
1667
1668         if (full_packed_format) {
1669             scale_factor_of_1st_plane_width_in_byte = 4;
1670         }
1671         else if (packed_yuv ) {
1672             scale_factor_of_1st_plane_width_in_byte = 2;
1673         }
1674         else if (interleaved_uv) {
1675             width[1] = obj_image->image.width;
1676             height[1] = obj_image->image.height / 2;
1677             pitch[1] = obj_image->image.pitches[1];
1678             offset[1] = obj_image->image.offsets[1];
1679         } else {
1680             width[1] = obj_image->image.width / 2;
1681             height[1] = obj_image->image.height / 2;
1682             pitch[1] = obj_image->image.pitches[1];
1683             offset[1] = obj_image->image.offsets[1];
1684             width[2] = obj_image->image.width / 2;
1685             height[2] = obj_image->image.height / 2;
1686             pitch[2] = obj_image->image.pitches[2];
1687             offset[2] = obj_image->image.offsets[2];
1688         }
1689     }
1690
1691     /* Y surface */
1692     i965_pp_set_surface_state(ctx, pp_context,
1693                               bo, offset[Y],
1694                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1695                               base_index, is_target);
1696
1697     if (!packed_yuv && !full_packed_format) {
1698         if (interleaved_uv) {
1699             i965_pp_set_surface_state(ctx, pp_context,
1700                                       bo, offset[UV],
1701                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1702                                       base_index + 1, is_target);
1703         } else {
1704             /* U surface */
1705             i965_pp_set_surface_state(ctx, pp_context,
1706                                       bo, offset[U],
1707                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1708                                       base_index + 1, is_target);
1709
1710             /* V surface */
1711             i965_pp_set_surface_state(ctx, pp_context,
1712                                       bo, offset[V],
1713                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1714                                       base_index + 2, is_target);
1715         }
1716     }
1717
1718 }
1719
1720 static void 
1721 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1722                                      const struct i965_surface *surface, 
1723                                      int base_index, int is_target,
1724                                      int *width, int *height, int *pitch, int *offset)
1725 {
1726     struct i965_driver_data *i965 = i965_driver_data(ctx);
1727     struct object_surface *obj_surface;
1728     struct object_image *obj_image;
1729     dri_bo *bo;
1730     int fourcc = pp_get_surface_fourcc(ctx, surface);
1731     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1732                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1733     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1734                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1735     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1736     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1737
1738     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1739         obj_surface = SURFACE(surface->id);
1740         bo = obj_surface->bo;
1741         width[0] = obj_surface->orig_width;
1742         height[0] = obj_surface->orig_height;
1743         pitch[0] = obj_surface->width;
1744         offset[0] = 0;
1745
1746         if (packed_yuv) {
1747             if (is_target)
1748                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1749             else
1750                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1751
1752             pitch[0] = obj_surface->width * 2;
1753         }
1754
1755         width[1] = obj_surface->cb_cr_width;
1756         height[1] = obj_surface->cb_cr_height;
1757         pitch[1] = obj_surface->cb_cr_pitch;
1758         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1759
1760         width[2] = obj_surface->cb_cr_width;
1761         height[2] = obj_surface->cb_cr_height;
1762         pitch[2] = obj_surface->cb_cr_pitch;
1763         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1764     } else {
1765         obj_image = IMAGE(surface->id);
1766         bo = obj_image->bo;
1767         width[0] = obj_image->image.width;
1768         height[0] = obj_image->image.height;
1769         pitch[0] = obj_image->image.pitches[0];
1770         offset[0] = obj_image->image.offsets[0];
1771
1772         if (packed_yuv) {
1773             if (is_target)
1774                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1775             else
1776                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1777         } else if (interleaved_uv) {
1778             width[1] = obj_image->image.width / 2;
1779             height[1] = obj_image->image.height / 2;
1780             pitch[1] = obj_image->image.pitches[1];
1781             offset[1] = obj_image->image.offsets[1];
1782         } else {
1783             width[1] = obj_image->image.width / 2;
1784             height[1] = obj_image->image.height / 2;
1785             pitch[1] = obj_image->image.pitches[U];
1786             offset[1] = obj_image->image.offsets[U];
1787             width[2] = obj_image->image.width / 2;
1788             height[2] = obj_image->image.height / 2;
1789             pitch[2] = obj_image->image.pitches[V];
1790             offset[2] = obj_image->image.offsets[V];
1791         }
1792     }
1793
1794     if (is_target) {
1795         gen7_pp_set_surface_state(ctx, pp_context,
1796                                   bo, 0,
1797                                   width[0] / 4, height[0], pitch[0],
1798                                   I965_SURFACEFORMAT_R8_SINT,
1799                                   base_index, 1);
1800
1801         if (!packed_yuv) {
1802             if (interleaved_uv) {
1803                 gen7_pp_set_surface_state(ctx, pp_context,
1804                                           bo, offset[1],
1805                                           width[1] / 2, height[1], pitch[1],
1806                                           I965_SURFACEFORMAT_R8G8_SINT,
1807                                           base_index + 1, 1);
1808             } else {
1809                 gen7_pp_set_surface_state(ctx, pp_context,
1810                                           bo, offset[1],
1811                                           width[1] / 4, height[1], pitch[1],
1812                                           I965_SURFACEFORMAT_R8_SINT,
1813                                           base_index + 1, 1);
1814                 gen7_pp_set_surface_state(ctx, pp_context,
1815                                           bo, offset[2],
1816                                           width[2] / 4, height[2], pitch[2],
1817                                           I965_SURFACEFORMAT_R8_SINT,
1818                                           base_index + 2, 1);
1819             }
1820         }
1821     } else {
1822         int format0 = SURFACE_FORMAT_Y8_UNORM;
1823
1824         switch (fourcc) {
1825         case VA_FOURCC('Y', 'U', 'Y', '2'):
1826             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1827             break;
1828
1829         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1830             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1831             break;
1832
1833         default:
1834             break;
1835         }
1836
1837         gen7_pp_set_surface2_state(ctx, pp_context,
1838                                    bo, offset[0],
1839                                    width[0], height[0], pitch[0],
1840                                    0, 0,
1841                                    format0, 0,
1842                                    base_index);
1843
1844         if (!packed_yuv) {
1845             if (interleaved_uv) {
1846                 gen7_pp_set_surface2_state(ctx, pp_context,
1847                                            bo, offset[1],
1848                                            width[1], height[1], pitch[1],
1849                                            0, 0,
1850                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1851                                            base_index + 1);
1852             } else {
1853                 gen7_pp_set_surface2_state(ctx, pp_context,
1854                                            bo, offset[1],
1855                                            width[1], height[1], pitch[1],
1856                                            0, 0,
1857                                            SURFACE_FORMAT_R8_UNORM, 0,
1858                                            base_index + 1);
1859                 gen7_pp_set_surface2_state(ctx, pp_context,
1860                                            bo, offset[2],
1861                                            width[2], height[2], pitch[2],
1862                                            0, 0,
1863                                            SURFACE_FORMAT_R8_UNORM, 0,
1864                                            base_index + 2);
1865             }
1866         }
1867     }
1868 }
1869
1870 static int
1871 pp_null_x_steps(void *private_context)
1872 {
1873     return 1;
1874 }
1875
1876 static int
1877 pp_null_y_steps(void *private_context)
1878 {
1879     return 1;
1880 }
1881
1882 static int
1883 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1884 {
1885     return 0;
1886 }
1887
1888 static VAStatus
1889 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1890                    const struct i965_surface *src_surface,
1891                    const VARectangle *src_rect,
1892                    struct i965_surface *dst_surface,
1893                    const VARectangle *dst_rect,
1894                    void *filter_param)
1895 {
1896     /* private function & data */
1897     pp_context->pp_x_steps = pp_null_x_steps;
1898     pp_context->pp_y_steps = pp_null_y_steps;
1899     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1900
1901     dst_surface->flags = src_surface->flags;
1902
1903     return VA_STATUS_SUCCESS;
1904 }
1905
1906 static int
1907 pp_load_save_x_steps(void *private_context)
1908 {
1909     return 1;
1910 }
1911
1912 static int
1913 pp_load_save_y_steps(void *private_context)
1914 {
1915     struct pp_load_save_context *pp_load_save_context = private_context;
1916
1917     return pp_load_save_context->dest_h / 8;
1918 }
1919
1920 static int
1921 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1922 {
1923     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1924     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1925
1926     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1927     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1928
1929     return 0;
1930 }
1931
1932 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1933 {
1934     int i;
1935     /* x offset of dest surface must be dword aligned.
1936      * so we have to extend dst surface on left edge, and mask out pixels not interested
1937      */
1938     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1939         pp_context->block_horizontal_mask_left = 0;
1940         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1941         {
1942             pp_context->block_horizontal_mask_left |= 1<<i;
1943         }
1944     }
1945     else {
1946         pp_context->block_horizontal_mask_left = 0xffff;
1947     }
1948     
1949     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1950     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1951         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1952     }
1953     else {
1954         pp_context->block_horizontal_mask_right = 0xffff;
1955     }
1956     
1957     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1958         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1959     }
1960     else {
1961         pp_context->block_vertical_mask_bottom = 0xff;
1962     }
1963
1964 }
1965 static VAStatus
1966 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1967                                 const struct i965_surface *src_surface,
1968                                 const VARectangle *src_rect,
1969                                 struct i965_surface *dst_surface,
1970                                 const VARectangle *dst_rect,
1971                                 void *filter_param)
1972 {
1973     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1974     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1975     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1976     int width[3], height[3], pitch[3], offset[3];
1977     const int Y = 0;
1978
1979     /* source surface */
1980     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1981                                     width, height, pitch, offset);
1982
1983     /* destination surface */
1984     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1985                                     width, height, pitch, offset);
1986
1987     /* private function & data */
1988     pp_context->pp_x_steps = pp_load_save_x_steps;
1989     pp_context->pp_y_steps = pp_load_save_y_steps;
1990     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1991
1992     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
1993     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
1994     pp_load_save_context->dest_y = dst_rect->y;
1995     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
1996     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
1997
1998     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
1999     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2000
2001     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2002     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2003
2004     // update u/v offset for packed yuv
2005     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2006     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2007
2008     dst_surface->flags = src_surface->flags;
2009
2010     return VA_STATUS_SUCCESS;
2011 }
2012
2013 static int
2014 pp_scaling_x_steps(void *private_context)
2015 {
2016     return 1;
2017 }
2018
2019 static int
2020 pp_scaling_y_steps(void *private_context)
2021 {
2022     struct pp_scaling_context *pp_scaling_context = private_context;
2023
2024     return pp_scaling_context->dest_h / 8;
2025 }
2026
2027 static int
2028 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2029 {
2030     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2031     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2032     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2033     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2034     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2035
2036     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2037     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2038     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2039     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2040     
2041     return 0;
2042 }
2043
2044 static VAStatus
2045 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2046                            const struct i965_surface *src_surface,
2047                            const VARectangle *src_rect,
2048                            struct i965_surface *dst_surface,
2049                            const VARectangle *dst_rect,
2050                            void *filter_param)
2051 {
2052     struct i965_driver_data *i965 = i965_driver_data(ctx);
2053     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2054     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2055     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2056     struct object_surface *obj_surface;
2057     struct i965_sampler_state *sampler_state;
2058     int in_w, in_h, in_wpitch, in_hpitch;
2059     int out_w, out_h, out_wpitch, out_hpitch;
2060
2061     /* source surface */
2062     obj_surface = SURFACE(src_surface->id);
2063     in_w = obj_surface->orig_width;
2064     in_h = obj_surface->orig_height;
2065     in_wpitch = obj_surface->width;
2066     in_hpitch = obj_surface->height;
2067
2068     /* source Y surface index 1 */
2069     i965_pp_set_surface_state(ctx, pp_context,
2070                               obj_surface->bo, 0,
2071                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2072                               1, 0);
2073
2074     /* source UV surface index 2 */
2075     i965_pp_set_surface_state(ctx, pp_context,
2076                               obj_surface->bo, in_wpitch * in_hpitch,
2077                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2078                               2, 0);
2079
2080     /* destination surface */
2081     obj_surface = SURFACE(dst_surface->id);
2082     out_w = obj_surface->orig_width;
2083     out_h = obj_surface->orig_height;
2084     out_wpitch = obj_surface->width;
2085     out_hpitch = obj_surface->height;
2086
2087     /* destination Y surface index 7 */
2088     i965_pp_set_surface_state(ctx, pp_context,
2089                               obj_surface->bo, 0,
2090                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2091                               7, 1);
2092
2093     /* destination UV surface index 8 */
2094     i965_pp_set_surface_state(ctx, pp_context,
2095                               obj_surface->bo, out_wpitch * out_hpitch,
2096                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2097                               8, 1);
2098
2099     /* sampler state */
2100     dri_bo_map(pp_context->sampler_state_table.bo, True);
2101     assert(pp_context->sampler_state_table.bo->virtual);
2102     sampler_state = pp_context->sampler_state_table.bo->virtual;
2103
2104     /* SIMD16 Y index 1 */
2105     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2106     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2107     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2108     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2109     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2110
2111     /* SIMD16 UV index 2 */
2112     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2113     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2114     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2115     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2116     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2117
2118     dri_bo_unmap(pp_context->sampler_state_table.bo);
2119
2120     /* private function & data */
2121     pp_context->pp_x_steps = pp_scaling_x_steps;
2122     pp_context->pp_y_steps = pp_scaling_y_steps;
2123     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2124
2125     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2126     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2127     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2128     pp_scaling_context->dest_y = dst_rect->y;
2129     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2130     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2131     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2132     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2133
2134     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2135
2136     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2137     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2138     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2139
2140     dst_surface->flags = src_surface->flags;
2141
2142     return VA_STATUS_SUCCESS;
2143 }
2144
2145 static int
2146 pp_avs_x_steps(void *private_context)
2147 {
2148     struct pp_avs_context *pp_avs_context = private_context;
2149
2150     return pp_avs_context->dest_w / 16;
2151 }
2152
2153 static int
2154 pp_avs_y_steps(void *private_context)
2155 {
2156     return 1;
2157 }
2158
2159 static int
2160 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2161 {
2162     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2163     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2164     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2165     float src_x_steping, src_y_steping, video_step_delta;
2166     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2167
2168     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2169         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2170         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2171     } else if (tmp_w >= pp_avs_context->dest_w) {
2172         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2173         pp_inline_parameter->grf6.video_step_delta = 0;
2174         
2175         if (x == 0) {
2176             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2177                 pp_avs_context->src_normalized_x;
2178         } else {
2179             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2180             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2181             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2182                 16 * 15 * video_step_delta / 2;
2183         }
2184     } else {
2185         int n0, n1, n2, nls_left, nls_right;
2186         int factor_a = 5, factor_b = 4;
2187         float f;
2188
2189         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2190         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2191         n2 = tmp_w / (16 * factor_a);
2192         nls_left = n0 + n2;
2193         nls_right = n1 + n2;
2194         f = (float) n2 * 16 / tmp_w;
2195         
2196         if (n0 < 5) {
2197             pp_inline_parameter->grf6.video_step_delta = 0.0;
2198
2199             if (x == 0) {
2200                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2201                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2202             } else {
2203                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2204                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2205                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2206                     16 * 15 * video_step_delta / 2;
2207             }
2208         } else {
2209             if (x < nls_left) {
2210                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2211                 float a = f / (nls_left * 16 * factor_b);
2212                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2213                 
2214                 pp_inline_parameter->grf6.video_step_delta = b;
2215
2216                 if (x == 0) {
2217                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2218                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2219                 } else {
2220                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2221                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2222                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2223                         16 * 15 * video_step_delta / 2;
2224                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2225                 }
2226             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2227                 /* scale the center linearly */
2228                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2229                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2230                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2231                     16 * 15 * video_step_delta / 2;
2232                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2233                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2234             } else {
2235                 float a = f / (nls_right * 16 * factor_b);
2236                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2237
2238                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2239                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2240                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2241                     16 * 15 * video_step_delta / 2;
2242                 pp_inline_parameter->grf6.video_step_delta = -b;
2243
2244                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2245                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2246                 else
2247                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2248             }
2249         }
2250     }
2251
2252     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2253     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2254     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2255     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2256
2257     return 0;
2258 }
2259
2260 static VAStatus
2261 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2262                        const struct i965_surface *src_surface,
2263                        const VARectangle *src_rect,
2264                        struct i965_surface *dst_surface,
2265                        const VARectangle *dst_rect,
2266                        void *filter_param,
2267                        int nlas)
2268 {
2269     struct i965_driver_data *i965 = i965_driver_data(ctx);
2270     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2271     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2272     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2273     struct object_surface *obj_surface;
2274     struct i965_sampler_8x8 *sampler_8x8;
2275     struct i965_sampler_8x8_state *sampler_8x8_state;
2276     int index;
2277     int in_w, in_h, in_wpitch, in_hpitch;
2278     int out_w, out_h, out_wpitch, out_hpitch;
2279     int i;
2280
2281     /* surface */
2282     obj_surface = SURFACE(src_surface->id);
2283     in_w = obj_surface->orig_width;
2284     in_h = obj_surface->orig_height;
2285     in_wpitch = obj_surface->width;
2286     in_hpitch = obj_surface->height;
2287
2288     /* source Y surface index 1 */
2289     i965_pp_set_surface2_state(ctx, pp_context,
2290                                obj_surface->bo, 0,
2291                                in_w, in_h, in_wpitch,
2292                                0, 0,
2293                                SURFACE_FORMAT_Y8_UNORM, 0,
2294                                1);
2295
2296     /* source UV surface index 2 */
2297     i965_pp_set_surface2_state(ctx, pp_context,
2298                                obj_surface->bo, in_wpitch * in_hpitch,
2299                                in_w / 2, in_h / 2, in_wpitch,
2300                                0, 0,
2301                                SURFACE_FORMAT_R8B8_UNORM, 0,
2302                                2);
2303
2304     /* destination surface */
2305     obj_surface = SURFACE(dst_surface->id);
2306     out_w = obj_surface->orig_width;
2307     out_h = obj_surface->orig_height;
2308     out_wpitch = obj_surface->width;
2309     out_hpitch = obj_surface->height;
2310     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2311
2312     /* destination Y surface index 7 */
2313     i965_pp_set_surface_state(ctx, pp_context,
2314                               obj_surface->bo, 0,
2315                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2316                               7, 1);
2317
2318     /* destination UV surface index 8 */
2319     i965_pp_set_surface_state(ctx, pp_context,
2320                               obj_surface->bo, out_wpitch * out_hpitch,
2321                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2322                               8, 1);
2323
2324     /* sampler 8x8 state */
2325     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2326     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2327     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2328     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2329     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2330
2331     for (i = 0; i < 17; i++) {
2332         /* for Y channel, currently ignore */
2333         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2334         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2335         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2336         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2337         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2338         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2339         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2340         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2341         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2342         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2343         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2344         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2345         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2346         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2347         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2348         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2349         /* for U/V channel, 0.25 */
2350         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2351         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2352         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2353         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2354         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2355         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2356         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2357         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2358         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2359         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2360         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2361         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2362         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2363         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2364         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2365         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2366     }
2367
2368     sampler_8x8_state->dw136.default_sharpness_level = 0;
2369     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2370     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2371     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2372     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2373
2374     /* sampler 8x8 */
2375     dri_bo_map(pp_context->sampler_state_table.bo, True);
2376     assert(pp_context->sampler_state_table.bo->virtual);
2377     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2378     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2379
2380     /* sample_8x8 Y index 1 */
2381     index = 1;
2382     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2383     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2384     sampler_8x8[index].dw0.ief_bypass = 1;
2385     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2386     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2387     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2388     sampler_8x8[index].dw2.global_noise_estimation = 22;
2389     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2390     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2391     sampler_8x8[index].dw3.strong_edge_weight = 7;
2392     sampler_8x8[index].dw3.regular_weight = 2;
2393     sampler_8x8[index].dw3.non_edge_weight = 0;
2394     sampler_8x8[index].dw3.gain_factor = 40;
2395     sampler_8x8[index].dw4.steepness_boost = 0;
2396     sampler_8x8[index].dw4.steepness_threshold = 0;
2397     sampler_8x8[index].dw4.mr_boost = 0;
2398     sampler_8x8[index].dw4.mr_threshold = 5;
2399     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2400     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2401     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2402     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2403     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2404     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2405     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2406     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2407     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2408     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2409     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2410     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2411     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2412     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2413     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2414     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2415     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2416     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2417     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2418     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2419     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2420     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2421     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2422     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2423     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2424     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2425     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2426     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2427     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2428     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2429     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2430     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2431     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2432     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2433     sampler_8x8[index].dw13.limiter_boost = 0;
2434     sampler_8x8[index].dw13.minimum_limiter = 10;
2435     sampler_8x8[index].dw13.maximum_limiter = 11;
2436     sampler_8x8[index].dw14.clip_limiter = 130;
2437     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2438                       I915_GEM_DOMAIN_RENDER, 
2439                       0,
2440                       0,
2441                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2442                       pp_context->sampler_state_table.bo_8x8);
2443
2444     /* sample_8x8 UV index 2 */
2445     index = 2;
2446     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2447     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2448     sampler_8x8[index].dw0.ief_bypass = 1;
2449     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2450     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2451     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2452     sampler_8x8[index].dw2.global_noise_estimation = 22;
2453     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2454     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2455     sampler_8x8[index].dw3.strong_edge_weight = 7;
2456     sampler_8x8[index].dw3.regular_weight = 2;
2457     sampler_8x8[index].dw3.non_edge_weight = 0;
2458     sampler_8x8[index].dw3.gain_factor = 40;
2459     sampler_8x8[index].dw4.steepness_boost = 0;
2460     sampler_8x8[index].dw4.steepness_threshold = 0;
2461     sampler_8x8[index].dw4.mr_boost = 0;
2462     sampler_8x8[index].dw4.mr_threshold = 5;
2463     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2464     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2465     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2466     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2467     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2468     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2469     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2470     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2471     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2472     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2473     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2474     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2475     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2476     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2477     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2478     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2479     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2480     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2481     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2482     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2483     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2484     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2485     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2486     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2487     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2488     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2489     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2490     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2491     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2492     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2493     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2494     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2495     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2496     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2497     sampler_8x8[index].dw13.limiter_boost = 0;
2498     sampler_8x8[index].dw13.minimum_limiter = 10;
2499     sampler_8x8[index].dw13.maximum_limiter = 11;
2500     sampler_8x8[index].dw14.clip_limiter = 130;
2501     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2502                       I915_GEM_DOMAIN_RENDER, 
2503                       0,
2504                       0,
2505                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2506                       pp_context->sampler_state_table.bo_8x8);
2507
2508     dri_bo_unmap(pp_context->sampler_state_table.bo);
2509
2510     /* private function & data */
2511     pp_context->pp_x_steps = pp_avs_x_steps;
2512     pp_context->pp_y_steps = pp_avs_y_steps;
2513     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2514
2515     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2516     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2517     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2518     pp_avs_context->dest_y = dst_rect->y;
2519     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2520     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2521     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2522     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2523     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2524     pp_avs_context->src_h = src_rect->height;
2525
2526     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2527     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2528
2529     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2530     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2531     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2532     pp_inline_parameter->grf6.video_step_delta = 0.0;
2533
2534     dst_surface->flags = src_surface->flags;
2535
2536     return VA_STATUS_SUCCESS;
2537 }
2538
2539 static VAStatus
2540 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2541                             const struct i965_surface *src_surface,
2542                             const VARectangle *src_rect,
2543                             struct i965_surface *dst_surface,
2544                             const VARectangle *dst_rect,
2545                             void *filter_param)
2546 {
2547     return pp_nv12_avs_initialize(ctx, pp_context,
2548                                   src_surface,
2549                                   src_rect,
2550                                   dst_surface,
2551                                   dst_rect,
2552                                   filter_param,
2553                                   1);
2554 }
2555
2556 static VAStatus
2557 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2558                              const struct i965_surface *src_surface,
2559                              const VARectangle *src_rect,
2560                              struct i965_surface *dst_surface,
2561                              const VARectangle *dst_rect,
2562                              void *filter_param)
2563 {
2564     return pp_nv12_avs_initialize(ctx, pp_context,
2565                                   src_surface,
2566                                   src_rect,
2567                                   dst_surface,
2568                                   dst_rect,
2569                                   filter_param,
2570                                   0);    
2571 }
2572
2573 static int
2574 gen7_pp_avs_x_steps(void *private_context)
2575 {
2576     struct pp_avs_context *pp_avs_context = private_context;
2577
2578     return pp_avs_context->dest_w / 16;
2579 }
2580
2581 static int
2582 gen7_pp_avs_y_steps(void *private_context)
2583 {
2584     struct pp_avs_context *pp_avs_context = private_context;
2585
2586     return pp_avs_context->dest_h / 16;
2587 }
2588
2589 static int
2590 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2591 {
2592     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2593     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2594
2595     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2596     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2597     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2598     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2599
2600     return 0;
2601 }
2602
2603 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2604                                               struct i965_post_processing_context *pp_context,
2605                                               const struct i965_surface *surface)
2606 {
2607     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2608     int fourcc = pp_get_surface_fourcc(ctx, surface);
2609     
2610     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2611         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2612         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2613         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2614     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2615         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2616         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2617         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2618     }
2619 }
2620
2621 static VAStatus
2622 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2623                            const struct i965_surface *src_surface,
2624                            const VARectangle *src_rect,
2625                            struct i965_surface *dst_surface,
2626                            const VARectangle *dst_rect,
2627                            void *filter_param)
2628 {
2629     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2630     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2631     struct gen7_sampler_8x8 *sampler_8x8;
2632     struct i965_sampler_8x8_state *sampler_8x8_state;
2633     int index, i;
2634     int width[3], height[3], pitch[3], offset[3];
2635     int src_width, src_height;
2636
2637     /* source surface */
2638     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2639                                          width, height, pitch, offset);
2640     src_width = width[0];
2641     src_height = height[0];
2642
2643     /* destination surface */
2644     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2645                                          width, height, pitch, offset);
2646
2647     /* sampler 8x8 state */
2648     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2649     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2650     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2651     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2652     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2653
2654     for (i = 0; i < 17; i++) {
2655         /* for Y channel, currently ignore */
2656         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2657         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2658         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2659         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2660         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2661         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2662         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2663         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2664         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2665         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2666         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2667         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2668         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2669         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2670         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2671         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2672         /* for U/V channel, 0.25 */
2673         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2674         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2675         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2676         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2677         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2678         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2679         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2680         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2681         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2682         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2683         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2684         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2685         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2686         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2687         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2688         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2689     }
2690
2691     sampler_8x8_state->dw136.default_sharpness_level = 0;
2692     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2693     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2694     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2695     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2696
2697     /* sampler 8x8 */
2698     dri_bo_map(pp_context->sampler_state_table.bo, True);
2699     assert(pp_context->sampler_state_table.bo->virtual);
2700     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2701     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2702
2703     /* sample_8x8 Y index 4 */
2704     index = 4;
2705     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2706     sampler_8x8[index].dw0.global_noise_estimation = 255;
2707     sampler_8x8[index].dw0.ief_bypass = 1;
2708
2709     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2710
2711     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2712     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2713     sampler_8x8[index].dw2.r5x_coefficient = 9;
2714     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2715     sampler_8x8[index].dw2.r5c_coefficient = 3;
2716
2717     sampler_8x8[index].dw3.r3x_coefficient = 27;
2718     sampler_8x8[index].dw3.r3c_coefficient = 5;
2719     sampler_8x8[index].dw3.gain_factor = 40;
2720     sampler_8x8[index].dw3.non_edge_weight = 1;
2721     sampler_8x8[index].dw3.regular_weight = 2;
2722     sampler_8x8[index].dw3.strong_edge_weight = 7;
2723     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2724
2725     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2726                       I915_GEM_DOMAIN_RENDER, 
2727                       0,
2728                       0,
2729                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2730                       pp_context->sampler_state_table.bo_8x8);
2731
2732     /* sample_8x8 UV index 8 */
2733     index = 8;
2734     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2735     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2736     sampler_8x8[index].dw0.global_noise_estimation = 255;
2737     sampler_8x8[index].dw0.ief_bypass = 1;
2738     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2739     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2740     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2741     sampler_8x8[index].dw2.r5x_coefficient = 9;
2742     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2743     sampler_8x8[index].dw2.r5c_coefficient = 3;
2744     sampler_8x8[index].dw3.r3x_coefficient = 27;
2745     sampler_8x8[index].dw3.r3c_coefficient = 5;
2746     sampler_8x8[index].dw3.gain_factor = 40;
2747     sampler_8x8[index].dw3.non_edge_weight = 1;
2748     sampler_8x8[index].dw3.regular_weight = 2;
2749     sampler_8x8[index].dw3.strong_edge_weight = 7;
2750     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2751
2752     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2753                       I915_GEM_DOMAIN_RENDER, 
2754                       0,
2755                       0,
2756                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2757                       pp_context->sampler_state_table.bo_8x8);
2758
2759     /* sampler_8x8 V, index 12 */
2760     index = 12;
2761     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2762     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2763     sampler_8x8[index].dw0.global_noise_estimation = 255;
2764     sampler_8x8[index].dw0.ief_bypass = 1;
2765     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2766     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2767     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2768     sampler_8x8[index].dw2.r5x_coefficient = 9;
2769     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2770     sampler_8x8[index].dw2.r5c_coefficient = 3;
2771     sampler_8x8[index].dw3.r3x_coefficient = 27;
2772     sampler_8x8[index].dw3.r3c_coefficient = 5;
2773     sampler_8x8[index].dw3.gain_factor = 40;
2774     sampler_8x8[index].dw3.non_edge_weight = 1;
2775     sampler_8x8[index].dw3.regular_weight = 2;
2776     sampler_8x8[index].dw3.strong_edge_weight = 7;
2777     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2778
2779     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2780                       I915_GEM_DOMAIN_RENDER, 
2781                       0,
2782                       0,
2783                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2784                       pp_context->sampler_state_table.bo_8x8);
2785
2786     dri_bo_unmap(pp_context->sampler_state_table.bo);
2787
2788     /* private function & data */
2789     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2790     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2791     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2792
2793     pp_avs_context->dest_x = dst_rect->x;
2794     pp_avs_context->dest_y = dst_rect->y;
2795     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2796     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2797     pp_avs_context->src_w = src_rect->width;
2798     pp_avs_context->src_h = src_rect->height;
2799
2800     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2801     dw = MAX(dw, pp_avs_context->dest_w);
2802
2803     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2804     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2805     pp_static_parameter->grf2.avs_wa_width = dw;
2806     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2807     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2808
2809     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2810     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2811     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2812     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2813
2814     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2815
2816     dst_surface->flags = src_surface->flags;
2817
2818     return VA_STATUS_SUCCESS;
2819 }
2820
2821 static int
2822 pp_dndi_x_steps(void *private_context)
2823 {
2824     return 1;
2825 }
2826
2827 static int
2828 pp_dndi_y_steps(void *private_context)
2829 {
2830     struct pp_dndi_context *pp_dndi_context = private_context;
2831
2832     return pp_dndi_context->dest_h / 4;
2833 }
2834
2835 static int
2836 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2837 {
2838     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2839
2840     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2841     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2842
2843     return 0;
2844 }
2845
2846 static VAStatus
2847 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2848                         const struct i965_surface *src_surface,
2849                         const VARectangle *src_rect,
2850                         struct i965_surface *dst_surface,
2851                         const VARectangle *dst_rect,
2852                         void *filter_param)
2853 {
2854     struct i965_driver_data *i965 = i965_driver_data(ctx);
2855     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2856     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2857     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2858     struct object_surface *obj_surface;
2859     struct i965_sampler_dndi *sampler_dndi;
2860     int index;
2861     int w, h;
2862     int orig_w, orig_h;
2863     int dndi_top_first = 1;
2864
2865     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2866         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2867
2868     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2869         dndi_top_first = 1;
2870     else
2871         dndi_top_first = 0;
2872
2873     /* surface */
2874     obj_surface = SURFACE(src_surface->id);
2875     orig_w = obj_surface->orig_width;
2876     orig_h = obj_surface->orig_height;
2877     w = obj_surface->width;
2878     h = obj_surface->height;
2879
2880     if (pp_context->stmm.bo == NULL) {
2881         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2882                                            "STMM surface",
2883                                            w * h,
2884                                            4096);
2885         assert(pp_context->stmm.bo);
2886     }
2887
2888     /* source UV surface index 2 */
2889     i965_pp_set_surface_state(ctx, pp_context,
2890                               obj_surface->bo, w * h,
2891                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2892                               2, 0);
2893
2894     /* source YUV surface index 4 */
2895     i965_pp_set_surface2_state(ctx, pp_context,
2896                                obj_surface->bo, 0,
2897                                orig_w, orig_h, w,
2898                                0, h,
2899                                SURFACE_FORMAT_PLANAR_420_8, 1,
2900                                4);
2901
2902     /* source STMM surface index 20 */
2903     i965_pp_set_surface_state(ctx, pp_context,
2904                               pp_context->stmm.bo, 0,
2905                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2906                               20, 1);
2907
2908     /* destination surface */
2909     obj_surface = SURFACE(dst_surface->id);
2910     orig_w = obj_surface->orig_width;
2911     orig_h = obj_surface->orig_height;
2912     w = obj_surface->width;
2913     h = obj_surface->height;
2914
2915     /* destination Y surface index 7 */
2916     i965_pp_set_surface_state(ctx, pp_context,
2917                               obj_surface->bo, 0,
2918                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2919                               7, 1);
2920
2921     /* destination UV surface index 8 */
2922     i965_pp_set_surface_state(ctx, pp_context,
2923                               obj_surface->bo, w * h,
2924                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2925                               8, 1);
2926     /* sampler dndi */
2927     dri_bo_map(pp_context->sampler_state_table.bo, True);
2928     assert(pp_context->sampler_state_table.bo->virtual);
2929     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2930     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2931
2932     /* sample dndi index 1 */
2933     index = 0;
2934     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2935     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2936     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2937     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2938
2939     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2940     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2941     sampler_dndi[index].dw1.stmm_c2 = 1;
2942     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2943     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2944
2945     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2946     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2947     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2948     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2949
2950     sampler_dndi[index].dw3.maximum_stmm = 128;
2951     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2952     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2953     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2954     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2955
2956     sampler_dndi[index].dw4.sdi_delta = 8;
2957     sampler_dndi[index].dw4.sdi_threshold = 128;
2958     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2959     sampler_dndi[index].dw4.stmm_shift_up = 0;
2960     sampler_dndi[index].dw4.stmm_shift_down = 0;
2961     sampler_dndi[index].dw4.minimum_stmm = 0;
2962
2963     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2964     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2965     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2966     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2967
2968     sampler_dndi[index].dw6.dn_enable = 1;
2969     sampler_dndi[index].dw6.di_enable = 1;
2970     sampler_dndi[index].dw6.di_partial = 0;
2971     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2972     sampler_dndi[index].dw6.dndi_stream_id = 0;
2973     sampler_dndi[index].dw6.dndi_first_frame = 1;
2974     sampler_dndi[index].dw6.progressive_dn = 0;
2975     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2976     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2977     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2978
2979     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2980     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2981     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2982     sampler_dndi[index].dw7.column_width_minus1 = 0;
2983
2984     dri_bo_unmap(pp_context->sampler_state_table.bo);
2985
2986     /* private function & data */
2987     pp_context->pp_x_steps = pp_dndi_x_steps;
2988     pp_context->pp_y_steps = pp_dndi_y_steps;
2989     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2990
2991     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2992     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2993     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2994     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2995
2996     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2997     pp_inline_parameter->grf5.number_blocks = w / 16;
2998     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2999     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3000
3001     pp_dndi_context->dest_w = w;
3002     pp_dndi_context->dest_h = h;
3003
3004     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3005
3006     return VA_STATUS_SUCCESS;
3007 }
3008
3009 static int
3010 pp_dn_x_steps(void *private_context)
3011 {
3012     return 1;
3013 }
3014
3015 static int
3016 pp_dn_y_steps(void *private_context)
3017 {
3018     struct pp_dn_context *pp_dn_context = private_context;
3019
3020     return pp_dn_context->dest_h / 8;
3021 }
3022
3023 static int
3024 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3025 {
3026     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3027
3028     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3029     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3030
3031     return 0;
3032 }
3033
3034 static VAStatus
3035 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3036                       const struct i965_surface *src_surface,
3037                       const VARectangle *src_rect,
3038                       struct i965_surface *dst_surface,
3039                       const VARectangle *dst_rect,
3040                       void *filter_param)
3041 {
3042     struct i965_driver_data *i965 = i965_driver_data(ctx);
3043     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3044     struct object_surface *obj_surface;
3045     struct i965_sampler_dndi *sampler_dndi;
3046     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3047     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3048     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3049     int index;
3050     int w, h;
3051     int orig_w, orig_h;
3052     int dn_strength = 15;
3053     int dndi_top_first = 1;
3054     int dn_progressive = 0;
3055
3056     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3057         dndi_top_first = 1;
3058         dn_progressive = 1;
3059     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3060         dndi_top_first = 1;
3061         dn_progressive = 0;
3062     } else {
3063         dndi_top_first = 0;
3064         dn_progressive = 0;
3065     }
3066
3067     if (dn_filter_param) {
3068         float value = dn_filter_param->value;
3069         
3070         if (value > 1.0)
3071             value = 1.0;
3072         
3073         if (value < 0.0)
3074             value = 0.0;
3075
3076         dn_strength = (int)(value * 31.0F);
3077     }
3078
3079     /* surface */
3080     obj_surface = SURFACE(src_surface->id);
3081     orig_w = obj_surface->orig_width;
3082     orig_h = obj_surface->orig_height;
3083     w = obj_surface->width;
3084     h = obj_surface->height;
3085
3086     if (pp_context->stmm.bo == NULL) {
3087         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3088                                            "STMM surface",
3089                                            w * h,
3090                                            4096);
3091         assert(pp_context->stmm.bo);
3092     }
3093
3094     /* source UV surface index 2 */
3095     i965_pp_set_surface_state(ctx, pp_context,
3096                               obj_surface->bo, w * h,
3097                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3098                               2, 0);
3099
3100     /* source YUV surface index 4 */
3101     i965_pp_set_surface2_state(ctx, pp_context,
3102                                obj_surface->bo, 0,
3103                                orig_w, orig_h, w,
3104                                0, h,
3105                                SURFACE_FORMAT_PLANAR_420_8, 1,
3106                                4);
3107
3108     /* source STMM surface index 20 */
3109     i965_pp_set_surface_state(ctx, pp_context,
3110                               pp_context->stmm.bo, 0,
3111                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3112                               20, 1);
3113
3114     /* destination surface */
3115     obj_surface = SURFACE(dst_surface->id);
3116     orig_w = obj_surface->orig_width;
3117     orig_h = obj_surface->orig_height;
3118     w = obj_surface->width;
3119     h = obj_surface->height;
3120
3121     /* destination Y surface index 7 */
3122     i965_pp_set_surface_state(ctx, pp_context,
3123                               obj_surface->bo, 0,
3124                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3125                               7, 1);
3126
3127     /* destination UV surface index 8 */
3128     i965_pp_set_surface_state(ctx, pp_context,
3129                               obj_surface->bo, w * h,
3130                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3131                               8, 1);
3132     /* sampler dn */
3133     dri_bo_map(pp_context->sampler_state_table.bo, True);
3134     assert(pp_context->sampler_state_table.bo->virtual);
3135     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3136     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3137
3138     /* sample dndi index 1 */
3139     index = 0;
3140     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3141     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3142     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3143     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3144
3145     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3146     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3147     sampler_dndi[index].dw1.stmm_c2 = 0;
3148     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3149     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3150
3151     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3152     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3153     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3154     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3155
3156     sampler_dndi[index].dw3.maximum_stmm = 128;
3157     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3158     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3159     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3160     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3161
3162     sampler_dndi[index].dw4.sdi_delta = 8;
3163     sampler_dndi[index].dw4.sdi_threshold = 128;
3164     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3165     sampler_dndi[index].dw4.stmm_shift_up = 0;
3166     sampler_dndi[index].dw4.stmm_shift_down = 0;
3167     sampler_dndi[index].dw4.minimum_stmm = 0;
3168
3169     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3170     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3171     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3172     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3173
3174     sampler_dndi[index].dw6.dn_enable = 1;
3175     sampler_dndi[index].dw6.di_enable = 0;
3176     sampler_dndi[index].dw6.di_partial = 0;
3177     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3178     sampler_dndi[index].dw6.dndi_stream_id = 1;
3179     sampler_dndi[index].dw6.dndi_first_frame = 1;
3180     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3181     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3182     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3183     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3184
3185     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3186     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3187     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3188     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3189
3190     dri_bo_unmap(pp_context->sampler_state_table.bo);
3191
3192     /* private function & data */
3193     pp_context->pp_x_steps = pp_dn_x_steps;
3194     pp_context->pp_y_steps = pp_dn_y_steps;
3195     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3196
3197     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3198     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3199     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3200     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3201
3202     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3203     pp_inline_parameter->grf5.number_blocks = w / 16;
3204     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3205     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3206
3207     pp_dn_context->dest_w = w;
3208     pp_dn_context->dest_h = h;
3209
3210     dst_surface->flags = src_surface->flags;
3211     
3212     return VA_STATUS_SUCCESS;
3213 }
3214
3215 static int
3216 gen7_pp_dndi_x_steps(void *private_context)
3217 {
3218     struct pp_dndi_context *pp_dndi_context = private_context;
3219
3220     return pp_dndi_context->dest_w / 16;
3221 }
3222
3223 static int
3224 gen7_pp_dndi_y_steps(void *private_context)
3225 {
3226     struct pp_dndi_context *pp_dndi_context = private_context;
3227
3228     return pp_dndi_context->dest_h / 4;
3229 }
3230
3231 static int
3232 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3233 {
3234     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3235
3236     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3237     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3238
3239     return 0;
3240 }
3241
3242 static VAStatus
3243 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3244                              const struct i965_surface *src_surface,
3245                              const VARectangle *src_rect,
3246                              struct i965_surface *dst_surface,
3247                              const VARectangle *dst_rect,
3248                              void *filter_param)
3249 {
3250     struct i965_driver_data *i965 = i965_driver_data(ctx);
3251     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3252     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3253     struct object_surface *obj_surface;
3254     struct gen7_sampler_dndi *sampler_dndi;
3255     int index;
3256     int w, h;
3257     int orig_w, orig_h;
3258     int dndi_top_first = 1;
3259
3260     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3261         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3262
3263     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3264         dndi_top_first = 1;
3265     else
3266         dndi_top_first = 0;
3267
3268     /* surface */
3269     obj_surface = SURFACE(src_surface->id);
3270     orig_w = obj_surface->orig_width;
3271     orig_h = obj_surface->orig_height;
3272     w = obj_surface->width;
3273     h = obj_surface->height;
3274
3275     if (pp_context->stmm.bo == NULL) {
3276         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3277                                            "STMM surface",
3278                                            w * h,
3279                                            4096);
3280         assert(pp_context->stmm.bo);
3281     }
3282
3283     /* source UV surface index 1 */
3284     gen7_pp_set_surface_state(ctx, pp_context,
3285                               obj_surface->bo, w * h,
3286                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3287                               1, 0);
3288
3289     /* source YUV surface index 3 */
3290     gen7_pp_set_surface2_state(ctx, pp_context,
3291                                obj_surface->bo, 0,
3292                                orig_w, orig_h, w,
3293                                0, h,
3294                                SURFACE_FORMAT_PLANAR_420_8, 1,
3295                                3);
3296
3297     /* source (temporal reference) YUV surface index 4 */
3298     gen7_pp_set_surface2_state(ctx, pp_context,
3299                                obj_surface->bo, 0,
3300                                orig_w, orig_h, w,
3301                                0, h,
3302                                SURFACE_FORMAT_PLANAR_420_8, 1,
3303                                4);
3304
3305     /* STMM / History Statistics input surface, index 5 */
3306     gen7_pp_set_surface_state(ctx, pp_context,
3307                               pp_context->stmm.bo, 0,
3308                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3309                               5, 1);
3310
3311     /* destination surface */
3312     obj_surface = SURFACE(dst_surface->id);
3313     orig_w = obj_surface->orig_width;
3314     orig_h = obj_surface->orig_height;
3315     w = obj_surface->width;
3316     h = obj_surface->height;
3317
3318     /* destination(Previous frame) Y surface index 27 */
3319     gen7_pp_set_surface_state(ctx, pp_context,
3320                               obj_surface->bo, 0,
3321                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3322                               27, 1);
3323
3324     /* destination(Previous frame) UV surface index 28 */
3325     gen7_pp_set_surface_state(ctx, pp_context,
3326                               obj_surface->bo, w * h,
3327                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3328                               28, 1);
3329
3330     /* destination(Current frame) Y surface index 30 */
3331     gen7_pp_set_surface_state(ctx, pp_context,
3332                               obj_surface->bo, 0,
3333                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3334                               30, 1);
3335
3336     /* destination(Current frame) UV surface index 31 */
3337     gen7_pp_set_surface_state(ctx, pp_context,
3338                               obj_surface->bo, w * h,
3339                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3340                               31, 1);
3341
3342     /* STMM output surface, index 33 */
3343     gen7_pp_set_surface_state(ctx, pp_context,
3344                               pp_context->stmm.bo, 0,
3345                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3346                               33, 1);
3347
3348
3349     /* sampler dndi */
3350     dri_bo_map(pp_context->sampler_state_table.bo, True);
3351     assert(pp_context->sampler_state_table.bo->virtual);
3352     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3353     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3354
3355     /* sample dndi index 0 */
3356     index = 0;
3357     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3358     sampler_dndi[index].dw0.dnmh_delt = 8;
3359     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3360     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3361     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3362     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3363
3364     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3365     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3366     sampler_dndi[index].dw1.stmm_c2 = 0;
3367     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3368     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3369
3370     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3371     sampler_dndi[index].dw2.bne_edge_th = 1;
3372     sampler_dndi[index].dw2.smooth_mv_th = 0;
3373     sampler_dndi[index].dw2.sad_tight_th = 5;
3374     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3375     sampler_dndi[index].dw2.good_neighbor_th = 4;
3376
3377     sampler_dndi[index].dw3.maximum_stmm = 128;
3378     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3379     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3380     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3381     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3382
3383     sampler_dndi[index].dw4.sdi_delta = 8;
3384     sampler_dndi[index].dw4.sdi_threshold = 128;
3385     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3386     sampler_dndi[index].dw4.stmm_shift_up = 0;
3387     sampler_dndi[index].dw4.stmm_shift_down = 0;
3388     sampler_dndi[index].dw4.minimum_stmm = 0;
3389
3390     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3391     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3392     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3393     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3394
3395     sampler_dndi[index].dw6.dn_enable = 0;
3396     sampler_dndi[index].dw6.di_enable = 1;
3397     sampler_dndi[index].dw6.di_partial = 0;
3398     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3399     sampler_dndi[index].dw6.dndi_stream_id = 1;
3400     sampler_dndi[index].dw6.dndi_first_frame = 1;
3401     sampler_dndi[index].dw6.progressive_dn = 0;
3402     sampler_dndi[index].dw6.mcdi_enable = 0;
3403     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3404     sampler_dndi[index].dw6.cat_th1 = 0;
3405     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3406     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3407
3408     sampler_dndi[index].dw7.sad_tha = 5;
3409     sampler_dndi[index].dw7.sad_thb = 10;
3410     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3411     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3412     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3413     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3414     sampler_dndi[index].dw7.neighborpixel_th = 10;
3415     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3416
3417     dri_bo_unmap(pp_context->sampler_state_table.bo);
3418
3419     /* private function & data */
3420     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3421     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3422     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3423
3424     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3425     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3426     pp_static_parameter->grf1.di_top_field_first = 0;
3427     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3428
3429     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3430     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3431     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3432
3433     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3434     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3435
3436     pp_dndi_context->dest_w = w;
3437     pp_dndi_context->dest_h = h;
3438
3439     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3440
3441     return VA_STATUS_SUCCESS;
3442 }
3443
3444 static int
3445 gen7_pp_dn_x_steps(void *private_context)
3446 {
3447     struct pp_dn_context *pp_dn_context = private_context;
3448
3449     return pp_dn_context->dest_w / 16;
3450 }
3451
3452 static int
3453 gen7_pp_dn_y_steps(void *private_context)
3454 {
3455     struct pp_dn_context *pp_dn_context = private_context;
3456
3457     return pp_dn_context->dest_h / 4;
3458 }
3459
3460 static int
3461 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3462 {
3463     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3464
3465     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3466     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3467
3468     return 0;
3469 }
3470
3471 static VAStatus
3472 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3473                            const struct i965_surface *src_surface,
3474                            const VARectangle *src_rect,
3475                            struct i965_surface *dst_surface,
3476                            const VARectangle *dst_rect,
3477                            void *filter_param)
3478 {
3479     struct i965_driver_data *i965 = i965_driver_data(ctx);
3480     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3481     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3482     struct object_surface *obj_surface;
3483     struct gen7_sampler_dndi *sampler_dn;
3484     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3485     int index;
3486     int w, h;
3487     int orig_w, orig_h;
3488     int dn_strength = 15;
3489     int dndi_top_first = 1;
3490     int dn_progressive = 0;
3491
3492     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3493         dndi_top_first = 1;
3494         dn_progressive = 1;
3495     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3496         dndi_top_first = 1;
3497         dn_progressive = 0;
3498     } else {
3499         dndi_top_first = 0;
3500         dn_progressive = 0;
3501     }
3502
3503     if (dn_filter_param) {
3504         float value = dn_filter_param->value;
3505         
3506         if (value > 1.0)
3507             value = 1.0;
3508         
3509         if (value < 0.0)
3510             value = 0.0;
3511
3512         dn_strength = (int)(value * 31.0F);
3513     }
3514
3515     /* surface */
3516     obj_surface = SURFACE(src_surface->id);
3517     orig_w = obj_surface->orig_width;
3518     orig_h = obj_surface->orig_height;
3519     w = obj_surface->width;
3520     h = obj_surface->height;
3521
3522     if (pp_context->stmm.bo == NULL) {
3523         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3524                                            "STMM surface",
3525                                            w * h,
3526                                            4096);
3527         assert(pp_context->stmm.bo);
3528     }
3529
3530     /* source UV surface index 1 */
3531     gen7_pp_set_surface_state(ctx, pp_context,
3532                               obj_surface->bo, w * h,
3533                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3534                               1, 0);
3535
3536     /* source YUV surface index 3 */
3537     gen7_pp_set_surface2_state(ctx, pp_context,
3538                                obj_surface->bo, 0,
3539                                orig_w, orig_h, w,
3540                                0, h,
3541                                SURFACE_FORMAT_PLANAR_420_8, 1,
3542                                3);
3543
3544     /* source (temporal reference) YUV surface index 4 */
3545     gen7_pp_set_surface2_state(ctx, pp_context,
3546                                obj_surface->bo, 0,
3547                                orig_w, orig_h, w,
3548                                0, h,
3549                                SURFACE_FORMAT_PLANAR_420_8, 1,
3550                                4);
3551
3552     /* STMM / History Statistics input surface, index 5 */
3553     gen7_pp_set_surface_state(ctx, pp_context,
3554                               pp_context->stmm.bo, 0,
3555                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3556                               5, 1);
3557
3558     /* destination surface */
3559     obj_surface = SURFACE(dst_surface->id);
3560     orig_w = obj_surface->orig_width;
3561     orig_h = obj_surface->orig_height;
3562     w = obj_surface->width;
3563     h = obj_surface->height;
3564
3565     /* destination Y surface index 24 */
3566     gen7_pp_set_surface_state(ctx, pp_context,
3567                               obj_surface->bo, 0,
3568                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3569                               24, 1);
3570
3571     /* destination UV surface index 25 */
3572     gen7_pp_set_surface_state(ctx, pp_context,
3573                               obj_surface->bo, w * h,
3574                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3575                               25, 1);
3576
3577     /* sampler dn */
3578     dri_bo_map(pp_context->sampler_state_table.bo, True);
3579     assert(pp_context->sampler_state_table.bo->virtual);
3580     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3581     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3582
3583     /* sample dn index 1 */
3584     index = 0;
3585     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3586     sampler_dn[index].dw0.dnmh_delt = 8;
3587     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3588     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3589     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3590     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3591
3592     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3593     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3594     sampler_dn[index].dw1.stmm_c2 = 0;
3595     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3596     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3597
3598     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3599     sampler_dn[index].dw2.bne_edge_th = 1;
3600     sampler_dn[index].dw2.smooth_mv_th = 0;
3601     sampler_dn[index].dw2.sad_tight_th = 5;
3602     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3603     sampler_dn[index].dw2.good_neighbor_th = 4;
3604
3605     sampler_dn[index].dw3.maximum_stmm = 128;
3606     sampler_dn[index].dw3.multipler_for_vecm = 2;
3607     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3608     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3609     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3610
3611     sampler_dn[index].dw4.sdi_delta = 8;
3612     sampler_dn[index].dw4.sdi_threshold = 128;
3613     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3614     sampler_dn[index].dw4.stmm_shift_up = 0;
3615     sampler_dn[index].dw4.stmm_shift_down = 0;
3616     sampler_dn[index].dw4.minimum_stmm = 0;
3617
3618     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3619     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3620     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3621     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3622
3623     sampler_dn[index].dw6.dn_enable = 1;
3624     sampler_dn[index].dw6.di_enable = 0;
3625     sampler_dn[index].dw6.di_partial = 0;
3626     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3627     sampler_dn[index].dw6.dndi_stream_id = 1;
3628     sampler_dn[index].dw6.dndi_first_frame = 1;
3629     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3630     sampler_dn[index].dw6.mcdi_enable = 0;
3631     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3632     sampler_dn[index].dw6.cat_th1 = 0;
3633     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3634     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3635
3636     sampler_dn[index].dw7.sad_tha = 5;
3637     sampler_dn[index].dw7.sad_thb = 10;
3638     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3639     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3640     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3641     sampler_dn[index].dw7.vdi_walker_enable = 0;
3642     sampler_dn[index].dw7.neighborpixel_th = 10;
3643     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3644
3645     dri_bo_unmap(pp_context->sampler_state_table.bo);
3646
3647     /* private function & data */
3648     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3649     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3650     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3651
3652     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3653     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3654     pp_static_parameter->grf1.di_top_field_first = 0;
3655     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3656
3657     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3658     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3659     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3660
3661     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3662     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3663
3664     pp_dn_context->dest_w = w;
3665     pp_dn_context->dest_h = h;
3666
3667     dst_surface->flags = src_surface->flags;
3668
3669     return VA_STATUS_SUCCESS;
3670 }
3671
3672 static VAStatus
3673 ironlake_pp_initialize(
3674     VADriverContextP   ctx,
3675     struct i965_post_processing_context *pp_context,
3676     const struct i965_surface *src_surface,
3677     const VARectangle *src_rect,
3678     struct i965_surface *dst_surface,
3679     const VARectangle *dst_rect,
3680     int                pp_index,
3681     void *filter_param
3682 )
3683 {
3684     VAStatus va_status;
3685     struct i965_driver_data *i965 = i965_driver_data(ctx);
3686     struct pp_module *pp_module;
3687     dri_bo *bo;
3688     int static_param_size, inline_param_size;
3689
3690     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3691     bo = dri_bo_alloc(i965->intel.bufmgr,
3692                       "surface state & binding table",
3693                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3694                       4096);
3695     assert(bo);
3696     pp_context->surface_state_binding_table.bo = bo;
3697
3698     dri_bo_unreference(pp_context->curbe.bo);
3699     bo = dri_bo_alloc(i965->intel.bufmgr,
3700                       "constant buffer",
3701                       4096, 
3702                       4096);
3703     assert(bo);
3704     pp_context->curbe.bo = bo;
3705
3706     dri_bo_unreference(pp_context->idrt.bo);
3707     bo = dri_bo_alloc(i965->intel.bufmgr, 
3708                       "interface discriptor", 
3709                       sizeof(struct i965_interface_descriptor), 
3710                       4096);
3711     assert(bo);
3712     pp_context->idrt.bo = bo;
3713     pp_context->idrt.num_interface_descriptors = 0;
3714
3715     dri_bo_unreference(pp_context->sampler_state_table.bo);
3716     bo = dri_bo_alloc(i965->intel.bufmgr, 
3717                       "sampler state table", 
3718                       4096,
3719                       4096);
3720     assert(bo);
3721     dri_bo_map(bo, True);
3722     memset(bo->virtual, 0, bo->size);
3723     dri_bo_unmap(bo);
3724     pp_context->sampler_state_table.bo = bo;
3725
3726     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3727     bo = dri_bo_alloc(i965->intel.bufmgr, 
3728                       "sampler 8x8 state ",
3729                       4096,
3730                       4096);
3731     assert(bo);
3732     pp_context->sampler_state_table.bo_8x8 = bo;
3733
3734     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3735     bo = dri_bo_alloc(i965->intel.bufmgr, 
3736                       "sampler 8x8 state ",
3737                       4096,
3738                       4096);
3739     assert(bo);
3740     pp_context->sampler_state_table.bo_8x8_uv = bo;
3741
3742     dri_bo_unreference(pp_context->vfe_state.bo);
3743     bo = dri_bo_alloc(i965->intel.bufmgr, 
3744                       "vfe state", 
3745                       sizeof(struct i965_vfe_state), 
3746                       4096);
3747     assert(bo);
3748     pp_context->vfe_state.bo = bo;
3749
3750     static_param_size = sizeof(struct pp_static_parameter);
3751     inline_param_size = sizeof(struct pp_inline_parameter);
3752
3753     memset(pp_context->pp_static_parameter, 0, static_param_size);
3754     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3755     
3756     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3757     pp_context->current_pp = pp_index;
3758     pp_module = &pp_context->pp_modules[pp_index];
3759     
3760     if (pp_module->initialize)
3761         va_status = pp_module->initialize(ctx, pp_context,
3762                                           src_surface,
3763                                           src_rect,
3764                                           dst_surface,
3765                                           dst_rect,
3766                                           filter_param);
3767     else
3768         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3769
3770     return va_status;
3771 }
3772
3773 static VAStatus
3774 ironlake_post_processing(
3775     VADriverContextP   ctx,
3776     struct i965_post_processing_context *pp_context,
3777     const struct i965_surface *src_surface,
3778     const VARectangle *src_rect,
3779     struct i965_surface *dst_surface,
3780     const VARectangle *dst_rect,
3781     int                pp_index,
3782     void *filter_param
3783 )
3784 {
3785     VAStatus va_status;
3786
3787     va_status = ironlake_pp_initialize(ctx, pp_context,
3788                                        src_surface,
3789                                        src_rect,
3790                                        dst_surface,
3791                                        dst_rect,
3792                                        pp_index,
3793                                        filter_param);
3794
3795     if (va_status == VA_STATUS_SUCCESS) {
3796         ironlake_pp_states_setup(ctx, pp_context);
3797         ironlake_pp_pipeline_setup(ctx, pp_context);
3798     }
3799
3800     return va_status;
3801 }
3802
3803 static VAStatus
3804 gen6_pp_initialize(
3805     VADriverContextP   ctx,
3806     struct i965_post_processing_context *pp_context,
3807     const struct i965_surface *src_surface,
3808     const VARectangle *src_rect,
3809     struct i965_surface *dst_surface,
3810     const VARectangle *dst_rect,
3811     int                pp_index,
3812     void *filter_param
3813 )
3814 {
3815     VAStatus va_status;
3816     struct i965_driver_data *i965 = i965_driver_data(ctx);
3817     struct pp_module *pp_module;
3818     dri_bo *bo;
3819     int static_param_size, inline_param_size;
3820
3821     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3822     bo = dri_bo_alloc(i965->intel.bufmgr,
3823                       "surface state & binding table",
3824                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3825                       4096);
3826     assert(bo);
3827     pp_context->surface_state_binding_table.bo = bo;
3828
3829     dri_bo_unreference(pp_context->curbe.bo);
3830     bo = dri_bo_alloc(i965->intel.bufmgr,
3831                       "constant buffer",
3832                       4096, 
3833                       4096);
3834     assert(bo);
3835     pp_context->curbe.bo = bo;
3836
3837     dri_bo_unreference(pp_context->idrt.bo);
3838     bo = dri_bo_alloc(i965->intel.bufmgr, 
3839                       "interface discriptor", 
3840                       sizeof(struct gen6_interface_descriptor_data), 
3841                       4096);
3842     assert(bo);
3843     pp_context->idrt.bo = bo;
3844     pp_context->idrt.num_interface_descriptors = 0;
3845
3846     dri_bo_unreference(pp_context->sampler_state_table.bo);
3847     bo = dri_bo_alloc(i965->intel.bufmgr, 
3848                       "sampler state table", 
3849                       4096,
3850                       4096);
3851     assert(bo);
3852     dri_bo_map(bo, True);
3853     memset(bo->virtual, 0, bo->size);
3854     dri_bo_unmap(bo);
3855     pp_context->sampler_state_table.bo = bo;
3856
3857     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3858     bo = dri_bo_alloc(i965->intel.bufmgr, 
3859                       "sampler 8x8 state ",
3860                       4096,
3861                       4096);
3862     assert(bo);
3863     pp_context->sampler_state_table.bo_8x8 = bo;
3864
3865     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3866     bo = dri_bo_alloc(i965->intel.bufmgr, 
3867                       "sampler 8x8 state ",
3868                       4096,
3869                       4096);
3870     assert(bo);
3871     pp_context->sampler_state_table.bo_8x8_uv = bo;
3872
3873     dri_bo_unreference(pp_context->vfe_state.bo);
3874     bo = dri_bo_alloc(i965->intel.bufmgr, 
3875                       "vfe state", 
3876                       sizeof(struct i965_vfe_state), 
3877                       4096);
3878     assert(bo);
3879     pp_context->vfe_state.bo = bo;
3880     
3881     if (IS_GEN7(i965->intel.device_id)) {
3882         static_param_size = sizeof(struct gen7_pp_static_parameter);
3883         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3884     } else {
3885         static_param_size = sizeof(struct pp_static_parameter);
3886         inline_param_size = sizeof(struct pp_inline_parameter);
3887     }
3888
3889     memset(pp_context->pp_static_parameter, 0, static_param_size);
3890     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3891
3892     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3893     pp_context->current_pp = pp_index;
3894     pp_module = &pp_context->pp_modules[pp_index];
3895     
3896     if (pp_module->initialize)
3897         va_status = pp_module->initialize(ctx, pp_context,
3898                                           src_surface,
3899                                           src_rect,
3900                                           dst_surface,
3901                                           dst_rect,
3902                                           filter_param);
3903     else
3904         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3905
3906     calculate_boundary_block_mask(pp_context, dst_rect);
3907     
3908     return va_status;
3909 }
3910
3911 static void
3912 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3913                                    struct i965_post_processing_context *pp_context)
3914 {
3915     struct i965_driver_data *i965 = i965_driver_data(ctx);
3916     struct gen6_interface_descriptor_data *desc;
3917     dri_bo *bo;
3918     int pp_index = pp_context->current_pp;
3919
3920     bo = pp_context->idrt.bo;
3921     dri_bo_map(bo, True);
3922     assert(bo->virtual);
3923     desc = bo->virtual;
3924     memset(desc, 0, sizeof(*desc));
3925     desc->desc0.kernel_start_pointer = 
3926         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3927     desc->desc1.single_program_flow = 1;
3928     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3929     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3930     desc->desc2.sampler_state_pointer = 
3931         pp_context->sampler_state_table.bo->offset >> 5;
3932     desc->desc3.binding_table_entry_count = 0;
3933     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3934     desc->desc4.constant_urb_entry_read_offset = 0;
3935
3936     if (IS_GEN7(i965->intel.device_id))
3937         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3938     else
3939         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3940
3941     dri_bo_emit_reloc(bo,
3942                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3943                       0,
3944                       offsetof(struct gen6_interface_descriptor_data, desc0),
3945                       pp_context->pp_modules[pp_index].kernel.bo);
3946
3947     dri_bo_emit_reloc(bo,
3948                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3949                       desc->desc2.sampler_count << 2,
3950                       offsetof(struct gen6_interface_descriptor_data, desc2),
3951                       pp_context->sampler_state_table.bo);
3952
3953     dri_bo_unmap(bo);
3954     pp_context->idrt.num_interface_descriptors++;
3955 }
3956
3957 static void
3958 gen6_pp_upload_constants(VADriverContextP ctx,
3959                          struct i965_post_processing_context *pp_context)
3960 {
3961     struct i965_driver_data *i965 = i965_driver_data(ctx);
3962     unsigned char *constant_buffer;
3963     int param_size;
3964
3965     assert(sizeof(struct pp_static_parameter) == 128);
3966     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3967
3968     if (IS_GEN7(i965->intel.device_id))
3969         param_size = sizeof(struct gen7_pp_static_parameter);
3970     else
3971         param_size = sizeof(struct pp_static_parameter);
3972
3973     dri_bo_map(pp_context->curbe.bo, 1);
3974     assert(pp_context->curbe.bo->virtual);
3975     constant_buffer = pp_context->curbe.bo->virtual;
3976     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3977     dri_bo_unmap(pp_context->curbe.bo);
3978 }
3979
3980 static void
3981 gen6_pp_states_setup(VADriverContextP ctx,
3982                      struct i965_post_processing_context *pp_context)
3983 {
3984     gen6_pp_interface_descriptor_table(ctx, pp_context);
3985     gen6_pp_upload_constants(ctx, pp_context);
3986 }
3987
3988 static void
3989 gen6_pp_pipeline_select(VADriverContextP ctx,
3990                         struct i965_post_processing_context *pp_context)
3991 {
3992     struct intel_batchbuffer *batch = pp_context->batch;
3993
3994     BEGIN_BATCH(batch, 1);
3995     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3996     ADVANCE_BATCH(batch);
3997 }
3998
3999 static void
4000 gen6_pp_state_base_address(VADriverContextP ctx,
4001                            struct i965_post_processing_context *pp_context)
4002 {
4003     struct intel_batchbuffer *batch = pp_context->batch;
4004
4005     BEGIN_BATCH(batch, 10);
4006     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4007     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4008     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4009     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4010     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4011     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4012     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4013     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4014     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4015     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4016     ADVANCE_BATCH(batch);
4017 }
4018
4019 static void
4020 gen6_pp_vfe_state(VADriverContextP ctx,
4021                   struct i965_post_processing_context *pp_context)
4022 {
4023     struct intel_batchbuffer *batch = pp_context->batch;
4024
4025     BEGIN_BATCH(batch, 8);
4026     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4027     OUT_BATCH(batch, 0);
4028     OUT_BATCH(batch,
4029               (pp_context->urb.num_vfe_entries - 1) << 16 |
4030               pp_context->urb.num_vfe_entries << 8);
4031     OUT_BATCH(batch, 0);
4032     OUT_BATCH(batch,
4033               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
4034               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
4035     OUT_BATCH(batch, 0);
4036     OUT_BATCH(batch, 0);
4037     OUT_BATCH(batch, 0);
4038     ADVANCE_BATCH(batch);
4039 }
4040
4041 static void
4042 gen6_pp_curbe_load(VADriverContextP ctx,
4043                    struct i965_post_processing_context *pp_context)
4044 {
4045     struct intel_batchbuffer *batch = pp_context->batch;
4046
4047     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
4048
4049     BEGIN_BATCH(batch, 4);
4050     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4051     OUT_BATCH(batch, 0);
4052     OUT_BATCH(batch,
4053               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
4054     OUT_RELOC(batch, 
4055               pp_context->curbe.bo,
4056               I915_GEM_DOMAIN_INSTRUCTION, 0,
4057               0);
4058     ADVANCE_BATCH(batch);
4059 }
4060
4061 static void
4062 gen6_interface_descriptor_load(VADriverContextP ctx,
4063                                struct i965_post_processing_context *pp_context)
4064 {
4065     struct intel_batchbuffer *batch = pp_context->batch;
4066
4067     BEGIN_BATCH(batch, 4);
4068     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4069     OUT_BATCH(batch, 0);
4070     OUT_BATCH(batch,
4071               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4072     OUT_RELOC(batch, 
4073               pp_context->idrt.bo,
4074               I915_GEM_DOMAIN_INSTRUCTION, 0,
4075               0);
4076     ADVANCE_BATCH(batch);
4077 }
4078
4079 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4080 {
4081     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4082
4083     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4084     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4085     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4086     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4087     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4088     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4089
4090     /* 1 x N */
4091     if (x_steps == 1) {
4092         if (y == y_steps-1) {
4093             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4094         }
4095         else {
4096             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4097         }
4098     }
4099
4100     /* M x 1 */
4101     if (y_steps == 1) {
4102         if (x == 0) { // all blocks in this group are on the left edge
4103             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4104             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4105         }
4106         else if (x == x_steps-1) {
4107             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4108             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4109         }
4110         else {
4111             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4112             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4113             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4114         }
4115     }
4116
4117 }
4118
4119 static void
4120 gen6_pp_object_walker(VADriverContextP ctx,
4121                       struct i965_post_processing_context *pp_context)
4122 {
4123     struct i965_driver_data *i965 = i965_driver_data(ctx);
4124     struct intel_batchbuffer *batch = pp_context->batch;
4125     int x, x_steps, y, y_steps;
4126     int param_size, command_length_in_dws;
4127     dri_bo *command_buffer;
4128     unsigned int *command_ptr;
4129
4130     if (IS_GEN7(i965->intel.device_id))
4131         param_size = sizeof(struct gen7_pp_inline_parameter);
4132     else
4133         param_size = sizeof(struct pp_inline_parameter);
4134
4135     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
4136     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
4137     command_length_in_dws = 6 + (param_size >> 2);
4138     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4139                                   "command objects buffer",
4140                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4141                                   4096);
4142
4143     dri_bo_map(command_buffer, 1);
4144     command_ptr = command_buffer->virtual;
4145
4146     for (y = 0; y < y_steps; y++) {
4147         for (x = 0; x < x_steps; x++) {
4148             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4149                 // some common block parameter update goes here, apply to all pp functions
4150                 if (IS_GEN6(i965->intel.device_id))
4151                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4152                 
4153                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4154                 *command_ptr++ = 0;
4155                 *command_ptr++ = 0;
4156                 *command_ptr++ = 0;
4157                 *command_ptr++ = 0;
4158                 *command_ptr++ = 0;
4159                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4160                 command_ptr += (param_size >> 2);
4161             }
4162         }
4163     }
4164
4165     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4166         *command_ptr++ = 0;
4167
4168     *command_ptr = MI_BATCH_BUFFER_END;
4169
4170     dri_bo_unmap(command_buffer);
4171
4172     BEGIN_BATCH(batch, 2);
4173     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
4174     OUT_RELOC(batch, command_buffer, 
4175               I915_GEM_DOMAIN_COMMAND, 0, 
4176               0);
4177     ADVANCE_BATCH(batch);
4178     
4179     dri_bo_unreference(command_buffer);
4180
4181     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4182      * will cause control to pass back to ring buffer 
4183      */
4184     intel_batchbuffer_end_atomic(batch);
4185     intel_batchbuffer_flush(batch);
4186     intel_batchbuffer_start_atomic(batch, 0x1000);
4187 }
4188
4189 static void
4190 gen6_pp_pipeline_setup(VADriverContextP ctx,
4191                        struct i965_post_processing_context *pp_context)
4192 {
4193     struct intel_batchbuffer *batch = pp_context->batch;
4194
4195     intel_batchbuffer_start_atomic(batch, 0x1000);
4196     intel_batchbuffer_emit_mi_flush(batch);
4197     gen6_pp_pipeline_select(ctx, pp_context);
4198     gen6_pp_state_base_address(ctx, pp_context);
4199     gen6_pp_vfe_state(ctx, pp_context);
4200     gen6_pp_curbe_load(ctx, pp_context);
4201     gen6_interface_descriptor_load(ctx, pp_context);
4202     gen6_pp_object_walker(ctx, pp_context);
4203     intel_batchbuffer_end_atomic(batch);
4204 }
4205
4206 static VAStatus
4207 gen6_post_processing(
4208     VADriverContextP   ctx,
4209     struct i965_post_processing_context *pp_context,
4210     const struct i965_surface *src_surface,
4211     const VARectangle *src_rect,
4212     struct i965_surface *dst_surface,
4213     const VARectangle *dst_rect,
4214     int                pp_index,
4215     void * filter_param
4216 )
4217 {
4218     VAStatus va_status;
4219     
4220     va_status = gen6_pp_initialize(ctx, pp_context,
4221                                    src_surface,
4222                                    src_rect,
4223                                    dst_surface,
4224                                    dst_rect,
4225                                    pp_index,
4226                                    filter_param);
4227
4228     if (va_status == VA_STATUS_SUCCESS) {
4229         gen6_pp_states_setup(ctx, pp_context);
4230         gen6_pp_pipeline_setup(ctx, pp_context);
4231     }
4232
4233     return va_status;
4234 }
4235
4236 static VAStatus
4237 i965_post_processing_internal(
4238     VADriverContextP   ctx,
4239     struct i965_post_processing_context *pp_context,
4240     const struct i965_surface *src_surface,
4241     const VARectangle *src_rect,
4242     struct i965_surface *dst_surface,
4243     const VARectangle *dst_rect,
4244     int                pp_index,
4245     void *filter_param
4246 )
4247 {
4248     struct i965_driver_data *i965 = i965_driver_data(ctx);
4249     VAStatus va_status;
4250
4251     if (IS_GEN6(i965->intel.device_id) ||
4252         IS_GEN7(i965->intel.device_id))
4253         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4254     else
4255         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4256     
4257     return va_status;
4258 }
4259
4260 VAStatus 
4261 i965_DestroySurfaces(VADriverContextP ctx,
4262                      VASurfaceID *surface_list,
4263                      int num_surfaces);
4264 VAStatus 
4265 i965_CreateSurfaces(VADriverContextP ctx,
4266                     int width,
4267                     int height,
4268                     int format,
4269                     int num_surfaces,
4270                     VASurfaceID *surfaces);
4271
4272 static void
4273 rgb_to_yuv(unsigned int argb,
4274            unsigned char *y,
4275            unsigned char *u,
4276            unsigned char *v,
4277            unsigned char *a)
4278 {
4279     int r = ((argb >> 16) & 0xff);
4280     int g = ((argb >> 8) & 0xff);
4281     int b = ((argb >> 0) & 0xff);
4282     
4283     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4284     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4285     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4286     *a = ((argb >> 24) & 0xff);
4287 }
4288
4289 static void 
4290 i965_vpp_clear_surface(VADriverContextP ctx,
4291                        struct i965_post_processing_context *pp_context,
4292                        VASurfaceID surface,
4293                        unsigned int color)
4294 {
4295     struct i965_driver_data *i965 = i965_driver_data(ctx);
4296     struct intel_batchbuffer *batch = pp_context->batch;
4297     struct object_surface *obj_surface = SURFACE(surface);
4298     unsigned int blt_cmd, br13;
4299     unsigned int tiling = 0, swizzle = 0;
4300     int pitch;
4301     unsigned char y, u, v, a = 0;
4302
4303     /* Currently only support NV12 surface */
4304     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4305         return;
4306
4307     rgb_to_yuv(color, &y, &u, &v, &a);
4308
4309     if (a == 0)
4310         return;
4311
4312     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4313     blt_cmd = XY_COLOR_BLT_CMD;
4314     pitch = obj_surface->width;
4315
4316     if (tiling != I915_TILING_NONE) {
4317         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4318         pitch >>= 2;
4319     }
4320
4321     br13 = 0xf0 << 16;
4322     br13 |= BR13_8;
4323     br13 |= pitch;
4324
4325     if (IS_GEN6(i965->intel.device_id) ||
4326         IS_GEN7(i965->intel.device_id)) {
4327         intel_batchbuffer_start_atomic_blt(batch, 48);
4328         BEGIN_BLT_BATCH(batch, 12);
4329     } else {
4330         intel_batchbuffer_start_atomic(batch, 48);
4331         BEGIN_BATCH(batch, 12);
4332     }
4333
4334     OUT_BATCH(batch, blt_cmd);
4335     OUT_BATCH(batch, br13);
4336     OUT_BATCH(batch,
4337               0 << 16 |
4338               0);
4339     OUT_BATCH(batch,
4340               obj_surface->height << 16 |
4341               obj_surface->width);
4342     OUT_RELOC(batch, obj_surface->bo, 
4343               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4344               0);
4345     OUT_BATCH(batch, y);
4346
4347     br13 = 0xf0 << 16;
4348     br13 |= BR13_565;
4349     br13 |= pitch;
4350
4351     OUT_BATCH(batch, blt_cmd);
4352     OUT_BATCH(batch, br13);
4353     OUT_BATCH(batch,
4354               0 << 16 |
4355               0);
4356     OUT_BATCH(batch,
4357               obj_surface->height / 2 << 16 |
4358               obj_surface->width / 2);
4359     OUT_RELOC(batch, obj_surface->bo, 
4360               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4361               obj_surface->width * obj_surface->y_cb_offset);
4362     OUT_BATCH(batch, v << 8 | u);
4363
4364     ADVANCE_BATCH(batch);
4365     intel_batchbuffer_end_atomic(batch);
4366 }
4367
4368 VAStatus
4369 i965_scaling_processing(
4370     VADriverContextP   ctx,
4371     VASurfaceID        src_surface_id,
4372     const VARectangle *src_rect,
4373     VASurfaceID        dst_surface_id,
4374     const VARectangle *dst_rect,
4375     unsigned int       flags)
4376 {
4377     VAStatus va_status = VA_STATUS_SUCCESS;
4378     struct i965_driver_data *i965 = i965_driver_data(ctx);
4379     struct object_surface *src_surface_obj = SURFACE(src_surface_id);
4380     struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
4381  
4382     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4383     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4384
4385     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4386         struct i965_surface src_surface;
4387         struct i965_surface dst_surface;
4388
4389          _i965LockMutex(&i965->pp_mutex);
4390
4391          src_surface.id = src_surface_id;
4392          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4393          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4394          dst_surface.id = dst_surface_id;
4395          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4396          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4397
4398          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4399                                                    &src_surface,
4400                                                    src_rect,
4401                                                    &dst_surface,
4402                                                    dst_rect,
4403                                                    PP_NV12_AVS,
4404                                                    NULL);
4405
4406          _i965UnlockMutex(&i965->pp_mutex);
4407     }
4408
4409     return va_status;
4410 }
4411
4412 VASurfaceID
4413 i965_post_processing(
4414     VADriverContextP   ctx,
4415     VASurfaceID        surface,
4416     const VARectangle *src_rect,
4417     const VARectangle *dst_rect,
4418     unsigned int       flags,
4419     int               *has_done_scaling  
4420 )
4421 {
4422     struct i965_driver_data *i965 = i965_driver_data(ctx);
4423     VASurfaceID in_surface_id = surface;
4424     VASurfaceID out_surface_id = VA_INVALID_ID;
4425     
4426     *has_done_scaling = 0;
4427
4428     if (HAS_PP(i965)) {
4429         struct object_surface *obj_surface;
4430         VAStatus status;
4431         struct i965_surface src_surface;
4432         struct i965_surface dst_surface;
4433
4434         obj_surface = SURFACE(in_surface_id);
4435
4436         /* Currently only support post processing for NV12 surface */
4437         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4438             return out_surface_id;
4439
4440         _i965LockMutex(&i965->pp_mutex);
4441
4442         if (flags & I965_PP_FLAG_MCDI) {
4443             status = i965_CreateSurfaces(ctx,
4444                                          obj_surface->orig_width,
4445                                          obj_surface->orig_height,
4446                                          VA_RT_FORMAT_YUV420,
4447                                          1,
4448                                          &out_surface_id);
4449             assert(status == VA_STATUS_SUCCESS);
4450             obj_surface = SURFACE(out_surface_id);
4451             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4452             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4453             src_surface.id = in_surface_id;
4454             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4455             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4456                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4457             dst_surface.id = out_surface_id;
4458             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4459             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4460
4461             i965_post_processing_internal(ctx, i965->pp_context,
4462                                           &src_surface,
4463                                           src_rect,
4464                                           &dst_surface,
4465                                           dst_rect,
4466                                           PP_NV12_DNDI,
4467                                           NULL);
4468         }
4469
4470         if (flags & I965_PP_FLAG_AVS) {
4471             struct i965_render_state *render_state = &i965->render_state;
4472             struct intel_region *dest_region = render_state->draw_region;
4473
4474             if (out_surface_id != VA_INVALID_ID)
4475                 in_surface_id = out_surface_id;
4476
4477             status = i965_CreateSurfaces(ctx,
4478                                          dest_region->width,
4479                                          dest_region->height,
4480                                          VA_RT_FORMAT_YUV420,
4481                                          1,
4482                                          &out_surface_id);
4483             assert(status == VA_STATUS_SUCCESS);
4484             obj_surface = SURFACE(out_surface_id);
4485             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4486             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4487             src_surface.id = in_surface_id;
4488             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4489             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4490             dst_surface.id = out_surface_id;
4491             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4492             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4493
4494             i965_post_processing_internal(ctx, i965->pp_context,
4495                                           &src_surface,
4496                                           src_rect,
4497                                           &dst_surface,
4498                                           dst_rect,
4499                                           PP_NV12_AVS,
4500                                           NULL);
4501
4502             if (in_surface_id != surface)
4503                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4504                 
4505             *has_done_scaling = 1;
4506         }
4507
4508         _i965UnlockMutex(&i965->pp_mutex);
4509     }
4510
4511     return out_surface_id;
4512 }       
4513
4514 static VAStatus
4515 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4516                           const struct i965_surface *src_surface,
4517                           const VARectangle *src_rect,
4518                           struct i965_surface *dst_surface,
4519                           const VARectangle *dst_rect)
4520 {
4521     struct i965_driver_data *i965 = i965_driver_data(ctx);
4522     struct i965_post_processing_context *pp_context = i965->pp_context;
4523     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4524
4525     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4526         i965_post_processing_internal(ctx, i965->pp_context,
4527                                       src_surface,
4528                                       src_rect,
4529                                       dst_surface,
4530                                       dst_rect,
4531                                       PP_RGBX_LOAD_SAVE_NV12,
4532                                       NULL);
4533     } else {
4534         assert(0);
4535         return VA_STATUS_ERROR_UNKNOWN;
4536     }
4537
4538     intel_batchbuffer_flush(pp_context->batch);
4539
4540     return VA_STATUS_SUCCESS;
4541 }
4542
4543 static VAStatus
4544 i965_image_pl3_processing(VADriverContextP ctx,
4545                           const struct i965_surface *src_surface,
4546                           const VARectangle *src_rect,
4547                           struct i965_surface *dst_surface,
4548                           const VARectangle *dst_rect)
4549 {
4550     struct i965_driver_data *i965 = i965_driver_data(ctx);
4551     struct i965_post_processing_context *pp_context = i965->pp_context;
4552     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4553     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4554
4555     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4556         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4557                                                  src_surface,
4558                                                  src_rect,
4559                                                  dst_surface,
4560                                                  dst_rect,
4561                                                  PP_PL3_LOAD_SAVE_N12,
4562                                                  NULL);
4563     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4564                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4565                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4566                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4567         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4568                                                  src_surface,
4569                                                  src_rect,
4570                                                  dst_surface,
4571                                                  dst_rect,
4572                                                  PP_PL3_LOAD_SAVE_PL3,
4573                                                  NULL);
4574     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4575                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4576         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4577                                                  src_surface,
4578                                                  src_rect,
4579                                                  dst_surface,
4580                                                  dst_rect,
4581                                                  PP_PL3_LOAD_SAVE_PA,
4582                                                  NULL);
4583     }
4584     else {
4585         assert(0);
4586     }
4587
4588     intel_batchbuffer_flush(pp_context->batch);
4589
4590     return vaStatus;
4591 }
4592
4593 static VAStatus
4594 i965_image_pl2_processing(VADriverContextP ctx,
4595                           const struct i965_surface *src_surface,
4596                           const VARectangle *src_rect,
4597                           struct i965_surface *dst_surface,
4598                           const VARectangle *dst_rect)
4599 {
4600     struct i965_driver_data *i965 = i965_driver_data(ctx);
4601     struct i965_post_processing_context *pp_context = i965->pp_context;
4602     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4603     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4604
4605     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4606         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4607                                                  src_surface,
4608                                                  src_rect,
4609                                                  dst_surface,
4610                                                  dst_rect,
4611                                                  PP_NV12_LOAD_SAVE_N12,
4612                                                  NULL);
4613     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4614                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4615                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4616                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4617         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4618                                                  src_surface,
4619                                                  src_rect,
4620                                                  dst_surface,
4621                                                  dst_rect,
4622                                                  PP_NV12_LOAD_SAVE_PL3,
4623                                                  NULL);
4624     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4625                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4626         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4627                                                  src_surface,
4628                                                  src_rect,
4629                                                  dst_surface,
4630                                                  dst_rect,
4631                                                  PP_NV12_LOAD_SAVE_PA,
4632                                                      NULL);
4633     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4634                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4635                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4636                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4637         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4638                                       src_surface,
4639                                       src_rect,
4640                                       dst_surface,
4641                                       dst_rect,
4642                                       PP_NV12_LOAD_SAVE_RGBX,
4643                                       NULL);
4644     } else {
4645         assert(0);
4646         return VA_STATUS_ERROR_UNKNOWN;
4647     }
4648
4649     intel_batchbuffer_flush(pp_context->batch);
4650
4651     return vaStatus;
4652 }
4653
4654 static VAStatus
4655 i965_image_pl1_processing(VADriverContextP ctx,
4656                           const struct i965_surface *src_surface,
4657                           const VARectangle *src_rect,
4658                           struct i965_surface *dst_surface,
4659                           const VARectangle *dst_rect)
4660 {
4661     struct i965_driver_data *i965 = i965_driver_data(ctx);
4662     struct i965_post_processing_context *pp_context = i965->pp_context;
4663     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4664
4665     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4666         i965_post_processing_internal(ctx, i965->pp_context,
4667                                       src_surface,
4668                                       src_rect,
4669                                       dst_surface,
4670                                       dst_rect,
4671                                       PP_PA_LOAD_SAVE_NV12,
4672                                       NULL);
4673     }
4674     else if (fourcc == VA_FOURCC_YV12) {
4675         i965_post_processing_internal(ctx, i965->pp_context,
4676                                       src_surface,
4677                                       src_rect,
4678                                       dst_surface,
4679                                       dst_rect,
4680                                       PP_PA_LOAD_SAVE_PL3,
4681                                       NULL);
4682
4683     }
4684     else {
4685         return VA_STATUS_ERROR_UNKNOWN;
4686     }
4687
4688     intel_batchbuffer_flush(pp_context->batch);
4689
4690     return VA_STATUS_SUCCESS;
4691 }
4692
4693 VAStatus
4694 i965_image_processing(VADriverContextP ctx,
4695                       const struct i965_surface *src_surface,
4696                       const VARectangle *src_rect,
4697                       struct i965_surface *dst_surface,
4698                       const VARectangle *dst_rect)
4699 {
4700     struct i965_driver_data *i965 = i965_driver_data(ctx);
4701     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4702
4703     if (HAS_PP(i965)) {
4704         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4705
4706         _i965LockMutex(&i965->pp_mutex);
4707
4708         switch (fourcc) {
4709         case VA_FOURCC('Y', 'V', '1', '2'):
4710         case VA_FOURCC('I', '4', '2', '0'):
4711         case VA_FOURCC('I', 'M', 'C', '1'):
4712         case VA_FOURCC('I', 'M', 'C', '3'):
4713             status = i965_image_pl3_processing(ctx,
4714                                                src_surface,
4715                                                src_rect,
4716                                                dst_surface,
4717                                                dst_rect);
4718             break;
4719
4720         case  VA_FOURCC('N', 'V', '1', '2'):
4721             status = i965_image_pl2_processing(ctx,
4722                                                src_surface,
4723                                                src_rect,
4724                                                dst_surface,
4725                                                dst_rect);
4726             break;
4727         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4728         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4729             status = i965_image_pl1_processing(ctx,
4730                                                src_surface,
4731                                                src_rect,
4732                                                dst_surface,
4733                                                dst_rect);
4734             break;
4735         case VA_FOURCC('B', 'G', 'R', 'A'):
4736         case VA_FOURCC('B', 'G', 'R', 'X'):
4737         case VA_FOURCC('R', 'G', 'B', 'A'):
4738         case VA_FOURCC('R', 'G', 'B', 'X'):
4739             status = i965_image_pl1_rgbx_processing(ctx,
4740                                                src_surface,
4741                                                src_rect,
4742                                                dst_surface,
4743                                                dst_rect);
4744             break;
4745         default:
4746             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4747             break;
4748         }
4749         
4750         _i965UnlockMutex(&i965->pp_mutex);
4751     }
4752
4753     return status;
4754 }       
4755
4756 static void
4757 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4758 {
4759     int i;
4760
4761     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4762     pp_context->surface_state_binding_table.bo = NULL;
4763
4764     dri_bo_unreference(pp_context->curbe.bo);
4765     pp_context->curbe.bo = NULL;
4766
4767     dri_bo_unreference(pp_context->sampler_state_table.bo);
4768     pp_context->sampler_state_table.bo = NULL;
4769
4770     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4771     pp_context->sampler_state_table.bo_8x8 = NULL;
4772
4773     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4774     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4775
4776     dri_bo_unreference(pp_context->idrt.bo);
4777     pp_context->idrt.bo = NULL;
4778     pp_context->idrt.num_interface_descriptors = 0;
4779
4780     dri_bo_unreference(pp_context->vfe_state.bo);
4781     pp_context->vfe_state.bo = NULL;
4782
4783     dri_bo_unreference(pp_context->stmm.bo);
4784     pp_context->stmm.bo = NULL;
4785
4786     for (i = 0; i < NUM_PP_MODULES; i++) {
4787         struct pp_module *pp_module = &pp_context->pp_modules[i];
4788
4789         dri_bo_unreference(pp_module->kernel.bo);
4790         pp_module->kernel.bo = NULL;
4791     }
4792
4793     free(pp_context->pp_static_parameter);
4794     free(pp_context->pp_inline_parameter);
4795     pp_context->pp_static_parameter = NULL;
4796     pp_context->pp_inline_parameter = NULL;
4797 }
4798
4799 Bool
4800 i965_post_processing_terminate(VADriverContextP ctx)
4801 {
4802     struct i965_driver_data *i965 = i965_driver_data(ctx);
4803     struct i965_post_processing_context *pp_context = i965->pp_context;
4804
4805     if (pp_context) {
4806         i965_post_processing_context_finalize(pp_context);
4807         free(pp_context);
4808     }
4809
4810     i965->pp_context = NULL;
4811
4812     return True;
4813 }
4814
4815 static void
4816 i965_post_processing_context_init(VADriverContextP ctx,
4817                                   struct i965_post_processing_context *pp_context,
4818                                   struct intel_batchbuffer *batch)
4819 {
4820     struct i965_driver_data *i965 = i965_driver_data(ctx);
4821     int i;
4822
4823     pp_context->urb.size = URB_SIZE((&i965->intel));
4824     pp_context->urb.num_vfe_entries = 32;
4825     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4826     pp_context->urb.num_cs_entries = 1;
4827     
4828     if (IS_GEN7(i965->intel.device_id))
4829         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4830     else
4831         pp_context->urb.size_cs_entry = 2;
4832
4833     pp_context->urb.vfe_start = 0;
4834     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4835         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4836     assert(pp_context->urb.cs_start + 
4837            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4838
4839     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4840     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4841     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4842     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
4843
4844     if (IS_HASWELL(i965->intel.device_id))
4845         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
4846     else if (IS_GEN7(i965->intel.device_id))
4847         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4848     else if (IS_GEN6(i965->intel.device_id))
4849         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4850     else if (IS_IRONLAKE(i965->intel.device_id))
4851         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4852
4853     for (i = 0; i < NUM_PP_MODULES; i++) {
4854         struct pp_module *pp_module = &pp_context->pp_modules[i];
4855         dri_bo_unreference(pp_module->kernel.bo);
4856         if (pp_module->kernel.bin && pp_module->kernel.size) {
4857             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4858                                                 pp_module->kernel.name,
4859                                                 pp_module->kernel.size,
4860                                                 4096);
4861             assert(pp_module->kernel.bo);
4862             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4863         } else {
4864             pp_module->kernel.bo = NULL;
4865         }
4866     }
4867
4868     /* static & inline parameters */
4869     if (IS_GEN7(i965->intel.device_id)) {
4870         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4871         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4872     } else {
4873         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4874         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4875     }
4876
4877     pp_context->batch = batch;
4878 }
4879
4880 Bool
4881 i965_post_processing_init(VADriverContextP ctx)
4882 {
4883     struct i965_driver_data *i965 = i965_driver_data(ctx);
4884     struct i965_post_processing_context *pp_context = i965->pp_context;
4885
4886     if (HAS_PP(i965)) {
4887         if (pp_context == NULL) {
4888             pp_context = calloc(1, sizeof(*pp_context));
4889             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4890             i965->pp_context = pp_context;
4891         }
4892     }
4893
4894     return True;
4895 }
4896
4897 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4898     PP_NULL,    /* VAProcFilterNone */
4899     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4900     PP_NULL,    /* VAProcFilterDeblocking */
4901     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4902     PP_NULL,    /* VAProcFilterSharpening */
4903     PP_NULL,    /* VAProcFilterColorBalance */
4904     PP_NULL,    /* VAProcFilterColorStandard */
4905     PP_NULL,    /* VAProcFilterFrameRateConversion */
4906 };
4907
4908 static const int proc_frame_to_pp_frame[3] = {
4909     I965_SURFACE_FLAG_FRAME,
4910     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4911     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4912 };
4913
4914 void 
4915 i965_proc_picture(VADriverContextP ctx, 
4916                   VAProfile profile, 
4917                   union codec_state *codec_state,
4918                   struct hw_context *hw_context)
4919 {
4920     struct i965_driver_data *i965 = i965_driver_data(ctx);
4921     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4922     struct proc_state *proc_state = &codec_state->proc;
4923     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4924     struct object_surface *obj_surface;
4925     struct i965_surface src_surface, dst_surface;
4926     VARectangle src_rect, dst_rect;
4927     VAStatus status;
4928     int i;
4929     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4930     int num_tmp_surfaces = 0;
4931     unsigned int tiling = 0, swizzle = 0;
4932     int in_width, in_height;
4933
4934     assert(pipeline_param->surface != VA_INVALID_ID);
4935     assert(proc_state->current_render_target != VA_INVALID_ID);
4936
4937     obj_surface = SURFACE(pipeline_param->surface);
4938     in_width = obj_surface->orig_width;
4939     in_height = obj_surface->orig_height;
4940     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4941
4942     src_surface.id = pipeline_param->surface;
4943     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4944     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4945
4946     VASurfaceID out_surface_id = VA_INVALID_ID;
4947     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4948         src_surface.id = pipeline_param->surface;
4949         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4950         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4951         src_rect.x = 0;
4952         src_rect.y = 0;
4953         src_rect.width = in_width;
4954         src_rect.height = in_height;
4955
4956         status = i965_CreateSurfaces(ctx,
4957                                      in_width,
4958                                      in_height,
4959                                      VA_RT_FORMAT_YUV420,
4960                                      1,
4961                                      &out_surface_id);
4962         assert(status == VA_STATUS_SUCCESS);
4963         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4964         obj_surface = SURFACE(out_surface_id);
4965         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4966
4967         dst_surface.id = out_surface_id;
4968         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4969         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4970         dst_rect.x = 0;
4971         dst_rect.y = 0;
4972         dst_rect.width = in_width;
4973         dst_rect.height = in_height;
4974
4975         status = i965_image_processing(ctx,
4976                                        &src_surface,
4977                                        &src_rect,
4978                                        &dst_surface,
4979                                        &dst_rect);
4980         assert(status == VA_STATUS_SUCCESS);
4981
4982         src_surface.id = out_surface_id;
4983         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4984         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4985     }
4986
4987     if (pipeline_param->surface_region) {
4988         src_rect.x = pipeline_param->surface_region->x;
4989         src_rect.y = pipeline_param->surface_region->y;
4990         src_rect.width = pipeline_param->surface_region->width;
4991         src_rect.height = pipeline_param->surface_region->height;
4992     } else {
4993         src_rect.x = 0;
4994         src_rect.y = 0;
4995         src_rect.width = in_width;
4996         src_rect.height = in_height;
4997     }
4998
4999     if (pipeline_param->output_region) {
5000         dst_rect.x = pipeline_param->output_region->x;
5001         dst_rect.y = pipeline_param->output_region->y;
5002         dst_rect.width = pipeline_param->output_region->width;
5003         dst_rect.height = pipeline_param->output_region->height;
5004     } else {
5005         dst_rect.x = 0;
5006         dst_rect.y = 0;
5007         dst_rect.width = in_width;
5008         dst_rect.height = in_height;
5009     }
5010
5011     for (i = 0; i < pipeline_param->num_filters; i++) {
5012         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5013         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5014         VAProcFilterType filter_type = filter_param->type;
5015         out_surface_id = VA_INVALID_ID;
5016         int kernel_index = procfilter_to_pp_flag[filter_type];
5017
5018         if (kernel_index != PP_NULL &&
5019             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5020             status = i965_CreateSurfaces(ctx,
5021                                          in_width,
5022                                          in_height,
5023                                          VA_RT_FORMAT_YUV420,
5024                                          1,
5025                                          &out_surface_id);
5026             assert(status == VA_STATUS_SUCCESS);
5027             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5028             obj_surface = SURFACE(out_surface_id);
5029             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5030             dst_surface.id = out_surface_id;
5031             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5032             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5033                                                    &src_surface,
5034                                                    &src_rect,
5035                                                    &dst_surface,
5036                                                    &src_rect,
5037                                                    kernel_index,
5038                                                    filter_param);
5039
5040             if (status == VA_STATUS_SUCCESS) {
5041                 src_surface.id = dst_surface.id;
5042                 src_surface.type = dst_surface.type;
5043                 src_surface.flags = dst_surface.flags;
5044             }
5045         }
5046     }
5047
5048     obj_surface = SURFACE(proc_state->current_render_target);
5049     int csc_needed = 0;
5050     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
5051         csc_needed = 1;
5052         out_surface_id = VA_INVALID_ID;
5053         status = i965_CreateSurfaces(ctx,
5054                                      obj_surface->orig_width,
5055                                      obj_surface->orig_height,
5056                                      VA_RT_FORMAT_YUV420, 
5057                                      1,
5058                                      &out_surface_id);
5059         assert(status == VA_STATUS_SUCCESS);
5060         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5061         struct object_surface *csc_surface = SURFACE(out_surface_id);
5062         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5063         dst_surface.id = out_surface_id;
5064     } else {
5065         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5066         dst_surface.id = proc_state->current_render_target;
5067     }
5068
5069     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5070     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
5071
5072     // load/save doesn't support different origin offset for src and dst surface
5073     if (src_rect.width == dst_rect.width &&
5074         src_rect.height == dst_rect.height &&
5075         src_rect.x == dst_rect.x &&
5076         src_rect.y == dst_rect.y) {
5077         i965_post_processing_internal(ctx, &proc_context->pp_context,
5078                                       &src_surface,
5079                                       &src_rect,
5080                                       &dst_surface,
5081                                       &dst_rect,
5082                                       PP_NV12_LOAD_SAVE_N12,
5083                                       NULL);
5084     } else {
5085
5086         i965_post_processing_internal(ctx, &proc_context->pp_context,
5087                                       &src_surface,
5088                                       &src_rect,
5089                                       &dst_surface,
5090                                       &dst_rect,
5091                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5092                                       PP_NV12_AVS : PP_NV12_SCALING,
5093                                       NULL);
5094     }
5095
5096     if (csc_needed) {
5097         src_surface.id = dst_surface.id;
5098         src_surface.type = dst_surface.type;
5099         src_surface.flags = dst_surface.flags;
5100         dst_surface.id = proc_state->current_render_target;
5101         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5102         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5103     }
5104     
5105     if (num_tmp_surfaces)
5106         i965_DestroySurfaces(ctx,
5107                              tmp_surfaces,
5108                              num_tmp_surfaces);
5109
5110     intel_batchbuffer_flush(hw_context->batch);
5111 }
5112
5113 static void
5114 i965_proc_context_destroy(void *hw_context)
5115 {
5116     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5117
5118     i965_post_processing_context_finalize(&proc_context->pp_context);
5119     intel_batchbuffer_free(proc_context->base.batch);
5120     free(proc_context);
5121 }
5122
5123 struct hw_context *
5124 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5125 {
5126     struct intel_driver_data *intel = intel_driver_data(ctx);
5127     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5128
5129     proc_context->base.destroy = i965_proc_context_destroy;
5130     proc_context->base.run = i965_proc_picture;
5131     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5132     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5133
5134     return (struct hw_context *)proc_context;
5135 }