work around hw limitation(dword alignment) of horizontal offset
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 };
639
640 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
641                                            const struct i965_surface *src_surface,
642                                            const VARectangle *src_rect,
643                                            struct i965_surface *dst_surface,
644                                            const VARectangle *dst_rect,
645                                            void *filter_param);
646 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
647                                              const struct i965_surface *src_surface,
648                                              const VARectangle *src_rect,
649                                              struct i965_surface *dst_surface,
650                                              const VARectangle *dst_rect,
651                                              void *filter_param);
652 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
653                                            const struct i965_surface *src_surface,
654                                            const VARectangle *src_rect,
655                                            struct i965_surface *dst_surface,
656                                            const VARectangle *dst_rect,
657                                            void *filter_param);
658
659 static struct pp_module pp_modules_gen7[] = {
660     {
661         {
662             "NULL module (for testing)",
663             PP_NULL,
664             pp_null_gen7,
665             sizeof(pp_null_gen7),
666             NULL,
667         },
668
669         pp_null_initialize,
670     },
671
672     {
673         {
674             "NV12_NV12",
675             PP_NV12_LOAD_SAVE_N12,
676             pp_nv12_load_save_nv12_gen7,
677             sizeof(pp_nv12_load_save_nv12_gen7),
678             NULL,
679         },
680
681         gen7_pp_plx_avs_initialize,
682     },
683
684     {
685         {
686             "NV12_PL3",
687             PP_NV12_LOAD_SAVE_PL3,
688             pp_nv12_load_save_pl3_gen7,
689             sizeof(pp_nv12_load_save_pl3_gen7),
690             NULL,
691         },
692         
693         gen7_pp_plx_avs_initialize,
694     },
695
696     {
697         {
698             "PL3_NV12",
699             PP_PL3_LOAD_SAVE_N12,
700             pp_pl3_load_save_nv12_gen7,
701             sizeof(pp_pl3_load_save_nv12_gen7),
702             NULL,
703         },
704
705         gen7_pp_plx_avs_initialize,
706     },
707
708     {
709         {
710             "PL3_PL3",
711             PP_PL3_LOAD_SAVE_N12,
712             pp_pl3_load_save_pl3_gen7,
713             sizeof(pp_pl3_load_save_pl3_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12 Scaling module",
723             PP_NV12_SCALING,
724             pp_nv12_scaling_gen7,
725             sizeof(pp_nv12_scaling_gen7),
726             NULL,
727         },
728
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "NV12 AVS module",
735             PP_NV12_AVS,
736             pp_nv12_avs_gen7,
737             sizeof(pp_nv12_avs_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "NV12 DNDI module",
747             PP_NV12_DNDI,
748             pp_nv12_dndi_gen7,
749             sizeof(pp_nv12_dndi_gen7),
750             NULL,
751         },
752
753         gen7_pp_nv12_dndi_initialize,
754     },
755
756     {
757         {
758             "NV12 DN module",
759             PP_NV12_DN,
760             pp_nv12_dn_gen7,
761             sizeof(pp_nv12_dn_gen7),
762             NULL,
763         },
764
765         gen7_pp_nv12_dn_initialize,
766     },
767     {
768         {
769             "NV12_PA module",
770             PP_NV12_LOAD_SAVE_PA,
771             pp_nv12_load_save_pa_gen7,
772             sizeof(pp_nv12_load_save_pa_gen7),
773             NULL,
774         },
775     
776         gen7_pp_plx_avs_initialize,
777     },
778
779     {
780         {
781             "PL3_PA module",
782             PP_PL3_LOAD_SAVE_PA,
783             pp_pl3_load_save_pa_gen7,
784             sizeof(pp_pl3_load_save_pa_gen7),
785             NULL,
786         },
787     
788         gen7_pp_plx_avs_initialize,
789     },
790
791     {
792         {
793             "PA_NV12 module",
794             PP_PA_LOAD_SAVE_NV12,
795             pp_pa_load_save_nv12_gen7,
796             sizeof(pp_pa_load_save_nv12_gen7),
797             NULL,
798         },
799     
800         gen7_pp_plx_avs_initialize,
801     },
802
803     {
804         {
805             "PA_PL3 module",
806             PP_PA_LOAD_SAVE_PL3,
807             pp_pa_load_save_pl3_gen7,
808             sizeof(pp_pa_load_save_pl3_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814     
815     {
816         {
817             "RGBX_NV12 module",
818             PP_RGBX_LOAD_SAVE_NV12,
819             pp_rgbx_load_save_nv12_gen7,
820             sizeof(pp_rgbx_load_save_nv12_gen7),
821             NULL,
822         },
823     
824         pp_plx_load_save_plx_initialize,
825     },
826
827     {
828         {
829             "NV12_RGBX module",
830             PP_NV12_LOAD_SAVE_RGBX,
831             pp_nv12_load_save_rgbx_gen7,
832             sizeof(pp_nv12_load_save_rgbx_gen7),
833             NULL,
834         },
835     
836         pp_plx_load_save_plx_initialize,
837     },
838             
839 };
840
841 static int
842 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     int fourcc;
846
847     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
848         struct object_image *obj_image = IMAGE(surface->id);
849         fourcc = obj_image->image.format.fourcc;
850     } else {
851         struct object_surface *obj_surface = SURFACE(surface->id);
852         fourcc = obj_surface->fourcc;
853     }
854
855     return fourcc;
856 }
857
858 static void
859 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
860 {
861     switch (tiling) {
862     case I915_TILING_NONE:
863         ss->ss3.tiled_surface = 0;
864         ss->ss3.tile_walk = 0;
865         break;
866     case I915_TILING_X:
867         ss->ss3.tiled_surface = 1;
868         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
869         break;
870     case I915_TILING_Y:
871         ss->ss3.tiled_surface = 1;
872         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
873         break;
874     }
875 }
876
877 static void
878 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
879 {
880     switch (tiling) {
881     case I915_TILING_NONE:
882         ss->ss2.tiled_surface = 0;
883         ss->ss2.tile_walk = 0;
884         break;
885     case I915_TILING_X:
886         ss->ss2.tiled_surface = 1;
887         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
888         break;
889     case I915_TILING_Y:
890         ss->ss2.tiled_surface = 1;
891         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
892         break;
893     }
894 }
895
896 static void
897 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
898 {
899     switch (tiling) {
900     case I915_TILING_NONE:
901         ss->ss0.tiled_surface = 0;
902         ss->ss0.tile_walk = 0;
903         break;
904     case I915_TILING_X:
905         ss->ss0.tiled_surface = 1;
906         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
907         break;
908     case I915_TILING_Y:
909         ss->ss0.tiled_surface = 1;
910         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
911         break;
912     }
913 }
914
915 static void
916 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
917 {
918     switch (tiling) {
919     case I915_TILING_NONE:
920         ss->ss2.tiled_surface = 0;
921         ss->ss2.tile_walk = 0;
922         break;
923     case I915_TILING_X:
924         ss->ss2.tiled_surface = 1;
925         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
926         break;
927     case I915_TILING_Y:
928         ss->ss2.tiled_surface = 1;
929         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
930         break;
931     }
932 }
933
934 static void
935 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
936 {
937     struct i965_interface_descriptor *desc;
938     dri_bo *bo;
939     int pp_index = pp_context->current_pp;
940
941     bo = pp_context->idrt.bo;
942     dri_bo_map(bo, 1);
943     assert(bo->virtual);
944     desc = bo->virtual;
945     memset(desc, 0, sizeof(*desc));
946     desc->desc0.grf_reg_blocks = 10;
947     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
948     desc->desc1.const_urb_entry_read_offset = 0;
949     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
950     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
951     desc->desc2.sampler_count = 0;
952     desc->desc3.binding_table_entry_count = 0;
953     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
954
955     dri_bo_emit_reloc(bo,
956                       I915_GEM_DOMAIN_INSTRUCTION, 0,
957                       desc->desc0.grf_reg_blocks,
958                       offsetof(struct i965_interface_descriptor, desc0),
959                       pp_context->pp_modules[pp_index].kernel.bo);
960
961     dri_bo_emit_reloc(bo,
962                       I915_GEM_DOMAIN_INSTRUCTION, 0,
963                       desc->desc2.sampler_count << 2,
964                       offsetof(struct i965_interface_descriptor, desc2),
965                       pp_context->sampler_state_table.bo);
966
967     dri_bo_unmap(bo);
968     pp_context->idrt.num_interface_descriptors++;
969 }
970
971 static void
972 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
973 {
974     struct i965_vfe_state *vfe_state;
975     dri_bo *bo;
976
977     bo = pp_context->vfe_state.bo;
978     dri_bo_map(bo, 1);
979     assert(bo->virtual);
980     vfe_state = bo->virtual;
981     memset(vfe_state, 0, sizeof(*vfe_state));
982     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
983     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
984     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
985     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
986     vfe_state->vfe1.children_present = 0;
987     vfe_state->vfe2.interface_descriptor_base = 
988         pp_context->idrt.bo->offset >> 4; /* reloc */
989     dri_bo_emit_reloc(bo,
990                       I915_GEM_DOMAIN_INSTRUCTION, 0,
991                       0,
992                       offsetof(struct i965_vfe_state, vfe2),
993                       pp_context->idrt.bo);
994     dri_bo_unmap(bo);
995 }
996
997 static void
998 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
999 {
1000     unsigned char *constant_buffer;
1001     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1002
1003     assert(sizeof(*pp_static_parameter) == 128);
1004     dri_bo_map(pp_context->curbe.bo, 1);
1005     assert(pp_context->curbe.bo->virtual);
1006     constant_buffer = pp_context->curbe.bo->virtual;
1007     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1008     dri_bo_unmap(pp_context->curbe.bo);
1009 }
1010
1011 static void
1012 ironlake_pp_states_setup(VADriverContextP ctx,
1013                          struct i965_post_processing_context *pp_context)
1014 {
1015     ironlake_pp_interface_descriptor_table(pp_context);
1016     ironlake_pp_vfe_state(pp_context);
1017     ironlake_pp_upload_constants(pp_context);
1018 }
1019
1020 static void
1021 ironlake_pp_pipeline_select(VADriverContextP ctx,
1022                             struct i965_post_processing_context *pp_context)
1023 {
1024     struct intel_batchbuffer *batch = pp_context->batch;
1025
1026     BEGIN_BATCH(batch, 1);
1027     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1028     ADVANCE_BATCH(batch);
1029 }
1030
1031 static void
1032 ironlake_pp_urb_layout(VADriverContextP ctx,
1033                        struct i965_post_processing_context *pp_context)
1034 {
1035     struct intel_batchbuffer *batch = pp_context->batch;
1036     unsigned int vfe_fence, cs_fence;
1037
1038     vfe_fence = pp_context->urb.cs_start;
1039     cs_fence = pp_context->urb.size;
1040
1041     BEGIN_BATCH(batch, 3);
1042     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1043     OUT_BATCH(batch, 0);
1044     OUT_BATCH(batch, 
1045               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1046               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1047     ADVANCE_BATCH(batch);
1048 }
1049
1050 static void
1051 ironlake_pp_state_base_address(VADriverContextP ctx,
1052                                struct i965_post_processing_context *pp_context)
1053 {
1054     struct intel_batchbuffer *batch = pp_context->batch;
1055
1056     BEGIN_BATCH(batch, 8);
1057     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1058     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1059     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1060     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1061     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1062     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1063     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1064     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1065     ADVANCE_BATCH(batch);
1066 }
1067
1068 static void
1069 ironlake_pp_state_pointers(VADriverContextP ctx,
1070                            struct i965_post_processing_context *pp_context)
1071 {
1072     struct intel_batchbuffer *batch = pp_context->batch;
1073
1074     BEGIN_BATCH(batch, 3);
1075     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1076     OUT_BATCH(batch, 0);
1077     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1078     ADVANCE_BATCH(batch);
1079 }
1080
1081 static void 
1082 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1083                           struct i965_post_processing_context *pp_context)
1084 {
1085     struct intel_batchbuffer *batch = pp_context->batch;
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1089     OUT_BATCH(batch,
1090               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1091               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1092     ADVANCE_BATCH(batch);
1093 }
1094
1095 static void
1096 ironlake_pp_constant_buffer(VADriverContextP ctx,
1097                             struct i965_post_processing_context *pp_context)
1098 {
1099     struct intel_batchbuffer *batch = pp_context->batch;
1100
1101     BEGIN_BATCH(batch, 2);
1102     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1103     OUT_RELOC(batch, pp_context->curbe.bo,
1104               I915_GEM_DOMAIN_INSTRUCTION, 0,
1105               pp_context->urb.size_cs_entry - 1);
1106     ADVANCE_BATCH(batch);    
1107 }
1108
1109 static void
1110 ironlake_pp_object_walker(VADriverContextP ctx,
1111                           struct i965_post_processing_context *pp_context)
1112 {
1113     struct intel_batchbuffer *batch = pp_context->batch;
1114     int x, x_steps, y, y_steps;
1115     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1116
1117     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1118     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1119
1120     for (y = 0; y < y_steps; y++) {
1121         for (x = 0; x < x_steps; x++) {
1122             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1123                 BEGIN_BATCH(batch, 20);
1124                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1125                 OUT_BATCH(batch, 0);
1126                 OUT_BATCH(batch, 0); /* no indirect data */
1127                 OUT_BATCH(batch, 0);
1128
1129                 /* inline data grf 5-6 */
1130                 assert(sizeof(*pp_inline_parameter) == 64);
1131                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1132
1133                 ADVANCE_BATCH(batch);
1134             }
1135         }
1136     }
1137 }
1138
1139 static void
1140 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1141                            struct i965_post_processing_context *pp_context)
1142 {
1143     struct intel_batchbuffer *batch = pp_context->batch;
1144
1145     intel_batchbuffer_start_atomic(batch, 0x1000);
1146     intel_batchbuffer_emit_mi_flush(batch);
1147     ironlake_pp_pipeline_select(ctx, pp_context);
1148     ironlake_pp_state_base_address(ctx, pp_context);
1149     ironlake_pp_state_pointers(ctx, pp_context);
1150     ironlake_pp_urb_layout(ctx, pp_context);
1151     ironlake_pp_cs_urb_layout(ctx, pp_context);
1152     ironlake_pp_constant_buffer(ctx, pp_context);
1153     ironlake_pp_object_walker(ctx, pp_context);
1154     intel_batchbuffer_end_atomic(batch);
1155 }
1156
1157 // update u/v offset when the surface format are packed yuv
1158 static void i965_update_src_surface_static_parameter(
1159     VADriverContextP    ctx, 
1160     struct i965_post_processing_context *pp_context,
1161     const struct i965_surface *surface)
1162 {
1163     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1164     int fourcc = pp_get_surface_fourcc(ctx, surface);
1165
1166     switch (fourcc) {
1167     case VA_FOURCC('Y', 'U', 'Y', '2'):
1168         pp_static_parameter->grf1.source_packed_u_offset = 1;
1169         pp_static_parameter->grf1.source_packed_v_offset = 3;
1170         break;
1171     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1172         pp_static_parameter->grf1.source_packed_y_offset = 1;
1173         pp_static_parameter->grf1.source_packed_v_offset = 2;
1174         break;
1175     case VA_FOURCC('B', 'G', 'R', 'X'):
1176     case VA_FOURCC('B', 'G', 'R', 'A'):
1177         pp_static_parameter->grf1.source_rgb_layout = 0;
1178         break;
1179     case VA_FOURCC('R', 'G', 'B', 'X'):
1180     case VA_FOURCC('R', 'G', 'B', 'A'):
1181         pp_static_parameter->grf1.source_rgb_layout = 1;
1182         break;
1183     default:
1184         break;
1185     }
1186     
1187 }
1188
1189 static void i965_update_dst_surface_static_parameter(
1190     VADriverContextP    ctx, 
1191     struct i965_post_processing_context *pp_context,
1192     const struct i965_surface *surface)
1193 {
1194     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1195     int fourcc = pp_get_surface_fourcc(ctx, surface);
1196
1197     switch (fourcc) {
1198     case VA_FOURCC('Y', 'U', 'Y', '2'):
1199         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1200         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1201         break;
1202     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1203         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1204         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1205         break;
1206     case VA_FOURCC('B', 'G', 'R', 'X'):
1207     case VA_FOURCC('B', 'G', 'R', 'A'):
1208         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1209         break;
1210     case VA_FOURCC('R', 'G', 'B', 'X'):
1211     case VA_FOURCC('R', 'G', 'B', 'A'):
1212         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1213         break;
1214     default:
1215         break;
1216     }
1217     
1218 }
1219
1220 static void
1221 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1222                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1223                           int width, int height, int pitch, int format, 
1224                           int index, int is_target)
1225 {
1226     struct i965_surface_state *ss;
1227     dri_bo *ss_bo;
1228     unsigned int tiling;
1229     unsigned int swizzle;
1230
1231     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1232     ss_bo = pp_context->surface_state_binding_table.bo;
1233     assert(ss_bo);
1234
1235     dri_bo_map(ss_bo, True);
1236     assert(ss_bo->virtual);
1237     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1238     memset(ss, 0, sizeof(*ss));
1239     ss->ss0.surface_type = I965_SURFACE_2D;
1240     ss->ss0.surface_format = format;
1241     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1242     ss->ss2.width = width - 1;
1243     ss->ss2.height = height - 1;
1244     ss->ss3.pitch = pitch - 1;
1245     pp_set_surface_tiling(ss, tiling);
1246     dri_bo_emit_reloc(ss_bo,
1247                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1248                       surf_bo_offset,
1249                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1250                       surf_bo);
1251     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1252     dri_bo_unmap(ss_bo);
1253 }
1254
1255 static void
1256 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1257                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1258                            int width, int height, int wpitch,
1259                            int xoffset, int yoffset,
1260                            int format, int interleave_chroma,
1261                            int index)
1262 {
1263     struct i965_surface_state2 *ss2;
1264     dri_bo *ss2_bo;
1265     unsigned int tiling;
1266     unsigned int swizzle;
1267
1268     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1269     ss2_bo = pp_context->surface_state_binding_table.bo;
1270     assert(ss2_bo);
1271
1272     dri_bo_map(ss2_bo, True);
1273     assert(ss2_bo->virtual);
1274     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1275     memset(ss2, 0, sizeof(*ss2));
1276     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1277     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1278     ss2->ss1.width = width - 1;
1279     ss2->ss1.height = height - 1;
1280     ss2->ss2.pitch = wpitch - 1;
1281     ss2->ss2.interleave_chroma = interleave_chroma;
1282     ss2->ss2.surface_format = format;
1283     ss2->ss3.x_offset_for_cb = xoffset;
1284     ss2->ss3.y_offset_for_cb = yoffset;
1285     pp_set_surface2_tiling(ss2, tiling);
1286     dri_bo_emit_reloc(ss2_bo,
1287                       I915_GEM_DOMAIN_RENDER, 0,
1288                       surf_bo_offset,
1289                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1290                       surf_bo);
1291     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1292     dri_bo_unmap(ss2_bo);
1293 }
1294
1295 static void
1296 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1297                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1298                           int width, int height, int pitch, int format, 
1299                           int index, int is_target)
1300 {
1301     struct gen7_surface_state *ss;
1302     dri_bo *ss_bo;
1303     unsigned int tiling;
1304     unsigned int swizzle;
1305
1306     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1307     ss_bo = pp_context->surface_state_binding_table.bo;
1308     assert(ss_bo);
1309
1310     dri_bo_map(ss_bo, True);
1311     assert(ss_bo->virtual);
1312     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1313     memset(ss, 0, sizeof(*ss));
1314     ss->ss0.surface_type = I965_SURFACE_2D;
1315     ss->ss0.surface_format = format;
1316     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1317     ss->ss2.width = width - 1;
1318     ss->ss2.height = height - 1;
1319     ss->ss3.pitch = pitch - 1;
1320     gen7_pp_set_surface_tiling(ss, tiling);
1321     dri_bo_emit_reloc(ss_bo,
1322                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1323                       surf_bo_offset,
1324                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1325                       surf_bo);
1326     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1327     dri_bo_unmap(ss_bo);
1328 }
1329
1330 static void
1331 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1332                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1333                            int width, int height, int wpitch,
1334                            int xoffset, int yoffset,
1335                            int format, int interleave_chroma,
1336                            int index)
1337 {
1338     struct gen7_surface_state2 *ss2;
1339     dri_bo *ss2_bo;
1340     unsigned int tiling;
1341     unsigned int swizzle;
1342
1343     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1344     ss2_bo = pp_context->surface_state_binding_table.bo;
1345     assert(ss2_bo);
1346
1347     dri_bo_map(ss2_bo, True);
1348     assert(ss2_bo->virtual);
1349     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1350     memset(ss2, 0, sizeof(*ss2));
1351     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1352     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1353     ss2->ss1.width = width - 1;
1354     ss2->ss1.height = height - 1;
1355     ss2->ss2.pitch = wpitch - 1;
1356     ss2->ss2.interleave_chroma = interleave_chroma;
1357     ss2->ss2.surface_format = format;
1358     ss2->ss3.x_offset_for_cb = xoffset;
1359     ss2->ss3.y_offset_for_cb = yoffset;
1360     gen7_pp_set_surface2_tiling(ss2, tiling);
1361     dri_bo_emit_reloc(ss2_bo,
1362                       I915_GEM_DOMAIN_RENDER, 0,
1363                       surf_bo_offset,
1364                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1365                       surf_bo);
1366     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1367     dri_bo_unmap(ss2_bo);
1368 }
1369
1370 static void 
1371 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1372                                 const struct i965_surface *surface, 
1373                                 int base_index, int is_target,
1374                                 int *width, int *height, int *pitch, int *offset)
1375 {
1376     struct i965_driver_data *i965 = i965_driver_data(ctx);
1377     struct object_surface *obj_surface;
1378     struct object_image *obj_image;
1379     dri_bo *bo;
1380     int fourcc = pp_get_surface_fourcc(ctx, surface);
1381     const int Y = 0;
1382     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1383     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1384     const int UV = 1;
1385     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1386     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1387     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1388                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1389                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1390                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1391     int scale_factor_of_1st_plane_width_in_byte = 1;
1392                               
1393     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1394         obj_surface = SURFACE(surface->id);
1395         bo = obj_surface->bo;
1396         width[0] = obj_surface->orig_width;
1397         height[0] = obj_surface->orig_height;
1398         pitch[0] = obj_surface->width;
1399         offset[0] = 0;
1400
1401         if (full_packed_format) {
1402             scale_factor_of_1st_plane_width_in_byte = 4; 
1403             pitch[0] = obj_surface->width * 4;
1404         }
1405         else if (packed_yuv ) {
1406             scale_factor_of_1st_plane_width_in_byte =  2; 
1407             pitch[0] = obj_surface->width * 2;
1408         }
1409         else if (interleaved_uv) {
1410             width[1] = obj_surface->orig_width;
1411             height[1] = obj_surface->orig_height / 2;
1412             pitch[1] = obj_surface->width;
1413             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1414         } else {
1415             width[1] = obj_surface->orig_width / 2;
1416             height[1] = obj_surface->orig_height / 2;
1417             pitch[1] = obj_surface->width / 2;
1418             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1419             width[2] = obj_surface->orig_width / 2;
1420             height[2] = obj_surface->orig_height / 2;
1421             pitch[2] = obj_surface->width / 2;
1422             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1423         }
1424     } else {
1425         obj_image = IMAGE(surface->id);
1426         bo = obj_image->bo;
1427         width[0] = obj_image->image.width;
1428         height[0] = obj_image->image.height;
1429         pitch[0] = obj_image->image.pitches[0];
1430         offset[0] = obj_image->image.offsets[0];
1431
1432         if (full_packed_format) {
1433             scale_factor_of_1st_plane_width_in_byte = 4;
1434         }
1435         else if (packed_yuv ) {
1436             scale_factor_of_1st_plane_width_in_byte = 2;
1437         }
1438         else if (interleaved_uv) {
1439             width[1] = obj_image->image.width;
1440             height[1] = obj_image->image.height / 2;
1441             pitch[1] = obj_image->image.pitches[1];
1442             offset[1] = obj_image->image.offsets[1];
1443         } else {
1444             width[1] = obj_image->image.width / 2;
1445             height[1] = obj_image->image.height / 2;
1446             pitch[1] = obj_image->image.pitches[1];
1447             offset[1] = obj_image->image.offsets[1];
1448             width[2] = obj_image->image.width / 2;
1449             height[2] = obj_image->image.height / 2;
1450             pitch[2] = obj_image->image.pitches[2];
1451             offset[2] = obj_image->image.offsets[2];
1452         }
1453     }
1454
1455     /* Y surface */
1456     i965_pp_set_surface_state(ctx, pp_context,
1457                               bo, offset[Y],
1458                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1459                               base_index, is_target);
1460
1461     if (!packed_yuv && !full_packed_format) {
1462         if (interleaved_uv) {
1463             i965_pp_set_surface_state(ctx, pp_context,
1464                                       bo, offset[UV],
1465                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1466                                       base_index + 1, is_target);
1467         } else {
1468             /* U surface */
1469             i965_pp_set_surface_state(ctx, pp_context,
1470                                       bo, offset[U],
1471                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1472                                       base_index + 1, is_target);
1473
1474             /* V surface */
1475             i965_pp_set_surface_state(ctx, pp_context,
1476                                       bo, offset[V],
1477                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1478                                       base_index + 2, is_target);
1479         }
1480     }
1481
1482 }
1483
1484 static void 
1485 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1486                                      const struct i965_surface *surface, 
1487                                      int base_index, int is_target,
1488                                      int *width, int *height, int *pitch, int *offset)
1489 {
1490     struct i965_driver_data *i965 = i965_driver_data(ctx);
1491     struct object_surface *obj_surface;
1492     struct object_image *obj_image;
1493     dri_bo *bo;
1494     int fourcc = pp_get_surface_fourcc(ctx, surface);
1495     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1496                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1497     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1498                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1499     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1500     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1501
1502     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1503         obj_surface = SURFACE(surface->id);
1504         bo = obj_surface->bo;
1505         width[0] = obj_surface->orig_width;
1506         height[0] = obj_surface->orig_height;
1507         pitch[0] = obj_surface->width;
1508         offset[0] = 0;
1509
1510         if (packed_yuv) {
1511             if (is_target)
1512                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1513             else
1514                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1515
1516             pitch[0] = obj_surface->width * 2;
1517         }
1518
1519         width[1] = obj_surface->cb_cr_width;
1520         height[1] = obj_surface->cb_cr_height;
1521         pitch[1] = obj_surface->cb_cr_pitch;
1522         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1523
1524         width[2] = obj_surface->cb_cr_width;
1525         height[2] = obj_surface->cb_cr_height;
1526         pitch[2] = obj_surface->cb_cr_pitch;
1527         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1528     } else {
1529         obj_image = IMAGE(surface->id);
1530         bo = obj_image->bo;
1531         width[0] = obj_image->image.width;
1532         height[0] = obj_image->image.height;
1533         pitch[0] = obj_image->image.pitches[0];
1534         offset[0] = obj_image->image.offsets[0];
1535
1536         if (packed_yuv) {
1537             if (is_target)
1538                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1539             else
1540                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1541         } else if (interleaved_uv) {
1542             width[1] = obj_image->image.width / 2;
1543             height[1] = obj_image->image.height / 2;
1544             pitch[1] = obj_image->image.pitches[1];
1545             offset[1] = obj_image->image.offsets[1];
1546         } else {
1547             width[1] = obj_image->image.width / 2;
1548             height[1] = obj_image->image.height / 2;
1549             pitch[1] = obj_image->image.pitches[U];
1550             offset[1] = obj_image->image.offsets[U];
1551             width[2] = obj_image->image.width / 2;
1552             height[2] = obj_image->image.height / 2;
1553             pitch[2] = obj_image->image.pitches[V];
1554             offset[2] = obj_image->image.offsets[V];
1555         }
1556     }
1557
1558     if (is_target) {
1559         gen7_pp_set_surface_state(ctx, pp_context,
1560                                   bo, 0,
1561                                   width[0] / 4, height[0], pitch[0],
1562                                   I965_SURFACEFORMAT_R8_SINT,
1563                                   base_index, 1);
1564
1565         if (!packed_yuv) {
1566             if (interleaved_uv) {
1567                 gen7_pp_set_surface_state(ctx, pp_context,
1568                                           bo, offset[1],
1569                                           width[1] / 2, height[1], pitch[1],
1570                                           I965_SURFACEFORMAT_R8G8_SINT,
1571                                           base_index + 1, 1);
1572             } else {
1573                 gen7_pp_set_surface_state(ctx, pp_context,
1574                                           bo, offset[1],
1575                                           width[1] / 4, height[1], pitch[1],
1576                                           I965_SURFACEFORMAT_R8_SINT,
1577                                           base_index + 1, 1);
1578                 gen7_pp_set_surface_state(ctx, pp_context,
1579                                           bo, offset[2],
1580                                           width[2] / 4, height[2], pitch[2],
1581                                           I965_SURFACEFORMAT_R8_SINT,
1582                                           base_index + 2, 1);
1583             }
1584         }
1585     } else {
1586         int format0 = SURFACE_FORMAT_Y8_UNORM;
1587
1588         switch (fourcc) {
1589         case VA_FOURCC('Y', 'U', 'Y', '2'):
1590             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1591             break;
1592
1593         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1594             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1595             break;
1596
1597         default:
1598             break;
1599         }
1600
1601         gen7_pp_set_surface2_state(ctx, pp_context,
1602                                    bo, offset[0],
1603                                    width[0], height[0], pitch[0],
1604                                    0, 0,
1605                                    format0, 0,
1606                                    base_index);
1607
1608         if (!packed_yuv) {
1609             if (interleaved_uv) {
1610                 gen7_pp_set_surface2_state(ctx, pp_context,
1611                                            bo, offset[1],
1612                                            width[1], height[1], pitch[1],
1613                                            0, 0,
1614                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1615                                            base_index + 1);
1616             } else {
1617                 gen7_pp_set_surface2_state(ctx, pp_context,
1618                                            bo, offset[1],
1619                                            width[1], height[1], pitch[1],
1620                                            0, 0,
1621                                            SURFACE_FORMAT_R8_UNORM, 0,
1622                                            base_index + 1);
1623                 gen7_pp_set_surface2_state(ctx, pp_context,
1624                                            bo, offset[2],
1625                                            width[2], height[2], pitch[2],
1626                                            0, 0,
1627                                            SURFACE_FORMAT_R8_UNORM, 0,
1628                                            base_index + 2);
1629             }
1630         }
1631     }
1632 }
1633
1634 static int
1635 pp_null_x_steps(void *private_context)
1636 {
1637     return 1;
1638 }
1639
1640 static int
1641 pp_null_y_steps(void *private_context)
1642 {
1643     return 1;
1644 }
1645
1646 static int
1647 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1648 {
1649     return 0;
1650 }
1651
1652 static VAStatus
1653 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1654                    const struct i965_surface *src_surface,
1655                    const VARectangle *src_rect,
1656                    struct i965_surface *dst_surface,
1657                    const VARectangle *dst_rect,
1658                    void *filter_param)
1659 {
1660     /* private function & data */
1661     pp_context->pp_x_steps = pp_null_x_steps;
1662     pp_context->pp_y_steps = pp_null_y_steps;
1663     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1664
1665     dst_surface->flags = src_surface->flags;
1666
1667     return VA_STATUS_SUCCESS;
1668 }
1669
1670 static int
1671 pp_load_save_x_steps(void *private_context)
1672 {
1673     return 1;
1674 }
1675
1676 static int
1677 pp_load_save_y_steps(void *private_context)
1678 {
1679     struct pp_load_save_context *pp_load_save_context = private_context;
1680
1681     return pp_load_save_context->dest_h / 8;
1682 }
1683
1684 static int
1685 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1686 {
1687     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1688
1689     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1690     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1691
1692     return 0;
1693 }
1694
1695 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1696 {
1697     int i;
1698     /* x offset of dest surface must be dword aligned.
1699      * so we have to extend dst surface on left edge, and mask out pixels not interested
1700      */
1701     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1702         pp_context->block_horizontal_mask_left = 0;
1703         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1704         {
1705             pp_context->block_horizontal_mask_left |= 1<<i;
1706         }
1707     }
1708     else {
1709         pp_context->block_horizontal_mask_left = 0xffff;
1710     }
1711     
1712     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1713     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1714         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1715     }
1716     else {
1717         pp_context->block_horizontal_mask_right = 0xffff;
1718     }
1719     
1720     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1721         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1722     }
1723     else {
1724         pp_context->block_vertical_mask_bottom = 0xff;
1725     }
1726
1727 }
1728 static VAStatus
1729 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1730                                 const struct i965_surface *src_surface,
1731                                 const VARectangle *src_rect,
1732                                 struct i965_surface *dst_surface,
1733                                 const VARectangle *dst_rect,
1734                                 void *filter_param)
1735 {
1736     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1737     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1738     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1739     int width[3], height[3], pitch[3], offset[3];
1740     const int Y = 0;
1741
1742     /* source surface */
1743     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1744                                     width, height, pitch, offset);
1745
1746     /* destination surface */
1747     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1748                                     width, height, pitch, offset);
1749
1750     /* private function & data */
1751     pp_context->pp_x_steps = pp_load_save_x_steps;
1752     pp_context->pp_y_steps = pp_load_save_y_steps;
1753     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1754     pp_load_save_context->dest_h = ALIGN(height[Y], 8);
1755     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1756
1757     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1758     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1759
1760     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1761     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1762
1763     // update u/v offset for packed yuv
1764     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
1765     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
1766
1767     dst_surface->flags = src_surface->flags;
1768
1769     return VA_STATUS_SUCCESS;
1770 }
1771
1772 static int
1773 pp_scaling_x_steps(void *private_context)
1774 {
1775     return 1;
1776 }
1777
1778 static int
1779 pp_scaling_y_steps(void *private_context)
1780 {
1781     struct pp_scaling_context *pp_scaling_context = private_context;
1782
1783     return pp_scaling_context->dest_h / 8;
1784 }
1785
1786 static int
1787 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1788 {
1789     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1790     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1791     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1792     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1793     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1794
1795     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1796     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1797     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1798     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1799     
1800     return 0;
1801 }
1802
1803 static VAStatus
1804 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1805                            const struct i965_surface *src_surface,
1806                            const VARectangle *src_rect,
1807                            struct i965_surface *dst_surface,
1808                            const VARectangle *dst_rect,
1809                            void *filter_param)
1810 {
1811     struct i965_driver_data *i965 = i965_driver_data(ctx);
1812     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1813     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1814     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1815     struct object_surface *obj_surface;
1816     struct i965_sampler_state *sampler_state;
1817     int in_w, in_h, in_wpitch, in_hpitch;
1818     int out_w, out_h, out_wpitch, out_hpitch;
1819
1820     /* source surface */
1821     obj_surface = SURFACE(src_surface->id);
1822     in_w = obj_surface->orig_width;
1823     in_h = obj_surface->orig_height;
1824     in_wpitch = obj_surface->width;
1825     in_hpitch = obj_surface->height;
1826
1827     /* source Y surface index 1 */
1828     i965_pp_set_surface_state(ctx, pp_context,
1829                               obj_surface->bo, 0,
1830                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1831                               1, 0);
1832
1833     /* source UV surface index 2 */
1834     i965_pp_set_surface_state(ctx, pp_context,
1835                               obj_surface->bo, in_wpitch * in_hpitch,
1836                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1837                               2, 0);
1838
1839     /* destination surface */
1840     obj_surface = SURFACE(dst_surface->id);
1841     out_w = obj_surface->orig_width;
1842     out_h = obj_surface->orig_height;
1843     out_wpitch = obj_surface->width;
1844     out_hpitch = obj_surface->height;
1845
1846     /* destination Y surface index 7 */
1847     i965_pp_set_surface_state(ctx, pp_context,
1848                               obj_surface->bo, 0,
1849                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1850                               7, 1);
1851
1852     /* destination UV surface index 8 */
1853     i965_pp_set_surface_state(ctx, pp_context,
1854                               obj_surface->bo, out_wpitch * out_hpitch,
1855                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1856                               8, 1);
1857
1858     /* sampler state */
1859     dri_bo_map(pp_context->sampler_state_table.bo, True);
1860     assert(pp_context->sampler_state_table.bo->virtual);
1861     sampler_state = pp_context->sampler_state_table.bo->virtual;
1862
1863     /* SIMD16 Y index 1 */
1864     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1865     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1866     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1867     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1868     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1869
1870     /* SIMD16 UV index 2 */
1871     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1872     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1873     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1874     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1875     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1876
1877     dri_bo_unmap(pp_context->sampler_state_table.bo);
1878
1879     /* private function & data */
1880     pp_context->pp_x_steps = pp_scaling_x_steps;
1881     pp_context->pp_y_steps = pp_scaling_y_steps;
1882     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1883
1884     pp_scaling_context->dest_x = dst_rect->x;
1885     pp_scaling_context->dest_y = dst_rect->y;
1886     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1887     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1888     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1889     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1890
1891     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1892
1893     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1894     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1895     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1896
1897     dst_surface->flags = src_surface->flags;
1898
1899     return VA_STATUS_SUCCESS;
1900 }
1901
1902 static int
1903 pp_avs_x_steps(void *private_context)
1904 {
1905     struct pp_avs_context *pp_avs_context = private_context;
1906
1907     return pp_avs_context->dest_w / 16;
1908 }
1909
1910 static int
1911 pp_avs_y_steps(void *private_context)
1912 {
1913     return 1;
1914 }
1915
1916 static int
1917 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1918 {
1919     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1920     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1921     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1922     float src_x_steping, src_y_steping, video_step_delta;
1923     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1924
1925     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1926         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1927         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1928     } else if (tmp_w >= pp_avs_context->dest_w) {
1929         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1930         pp_inline_parameter->grf6.video_step_delta = 0;
1931         
1932         if (x == 0) {
1933             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1934                 pp_avs_context->src_normalized_x;
1935         } else {
1936             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1937             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1938             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1939                 16 * 15 * video_step_delta / 2;
1940         }
1941     } else {
1942         int n0, n1, n2, nls_left, nls_right;
1943         int factor_a = 5, factor_b = 4;
1944         float f;
1945
1946         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1947         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1948         n2 = tmp_w / (16 * factor_a);
1949         nls_left = n0 + n2;
1950         nls_right = n1 + n2;
1951         f = (float) n2 * 16 / tmp_w;
1952         
1953         if (n0 < 5) {
1954             pp_inline_parameter->grf6.video_step_delta = 0.0;
1955
1956             if (x == 0) {
1957                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1958                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1959             } else {
1960                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1961                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1962                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1963                     16 * 15 * video_step_delta / 2;
1964             }
1965         } else {
1966             if (x < nls_left) {
1967                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1968                 float a = f / (nls_left * 16 * factor_b);
1969                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1970                 
1971                 pp_inline_parameter->grf6.video_step_delta = b;
1972
1973                 if (x == 0) {
1974                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1975                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1976                 } else {
1977                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1978                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1979                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1980                         16 * 15 * video_step_delta / 2;
1981                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1982                 }
1983             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1984                 /* scale the center linearly */
1985                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1986                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1987                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1988                     16 * 15 * video_step_delta / 2;
1989                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1990                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1991             } else {
1992                 float a = f / (nls_right * 16 * factor_b);
1993                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1994
1995                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1996                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1997                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1998                     16 * 15 * video_step_delta / 2;
1999                 pp_inline_parameter->grf6.video_step_delta = -b;
2000
2001                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2002                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2003                 else
2004                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2005             }
2006         }
2007     }
2008
2009     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2010     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2011     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2012     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2013
2014     return 0;
2015 }
2016
2017 static VAStatus
2018 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2019                        const struct i965_surface *src_surface,
2020                        const VARectangle *src_rect,
2021                        struct i965_surface *dst_surface,
2022                        const VARectangle *dst_rect,
2023                        void *filter_param,
2024                        int nlas)
2025 {
2026     struct i965_driver_data *i965 = i965_driver_data(ctx);
2027     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2028     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2029     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2030     struct object_surface *obj_surface;
2031     struct i965_sampler_8x8 *sampler_8x8;
2032     struct i965_sampler_8x8_state *sampler_8x8_state;
2033     int index;
2034     int in_w, in_h, in_wpitch, in_hpitch;
2035     int out_w, out_h, out_wpitch, out_hpitch;
2036     int i;
2037
2038     /* surface */
2039     obj_surface = SURFACE(src_surface->id);
2040     in_w = obj_surface->orig_width;
2041     in_h = obj_surface->orig_height;
2042     in_wpitch = obj_surface->width;
2043     in_hpitch = obj_surface->height;
2044
2045     /* source Y surface index 1 */
2046     i965_pp_set_surface2_state(ctx, pp_context,
2047                                obj_surface->bo, 0,
2048                                in_w, in_h, in_wpitch,
2049                                0, 0,
2050                                SURFACE_FORMAT_Y8_UNORM, 0,
2051                                1);
2052
2053     /* source UV surface index 2 */
2054     i965_pp_set_surface2_state(ctx, pp_context,
2055                                obj_surface->bo, in_wpitch * in_hpitch,
2056                                in_w / 2, in_h / 2, in_wpitch,
2057                                0, 0,
2058                                SURFACE_FORMAT_R8B8_UNORM, 0,
2059                                2);
2060
2061     /* destination surface */
2062     obj_surface = SURFACE(dst_surface->id);
2063     out_w = obj_surface->orig_width;
2064     out_h = obj_surface->orig_height;
2065     out_wpitch = obj_surface->width;
2066     out_hpitch = obj_surface->height;
2067     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2068
2069     /* destination Y surface index 7 */
2070     i965_pp_set_surface_state(ctx, pp_context,
2071                               obj_surface->bo, 0,
2072                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2073                               7, 1);
2074
2075     /* destination UV surface index 8 */
2076     i965_pp_set_surface_state(ctx, pp_context,
2077                               obj_surface->bo, out_wpitch * out_hpitch,
2078                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2079                               8, 1);
2080
2081     /* sampler 8x8 state */
2082     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2083     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2084     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2085     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2086     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2087
2088     for (i = 0; i < 17; i++) {
2089         /* for Y channel, currently ignore */
2090         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2091         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2092         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2093         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2094         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2095         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2096         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2097         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2098         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2099         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2100         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2101         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2102         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2103         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2104         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2105         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2106         /* for U/V channel, 0.25 */
2107         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2108         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2109         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2110         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2111         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2112         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2113         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2114         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2115         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2116         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2117         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2118         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2119         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2120         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2121         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2122         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2123     }
2124
2125     sampler_8x8_state->dw136.default_sharpness_level = 0;
2126     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2127     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2128     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2129     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2130
2131     /* sampler 8x8 */
2132     dri_bo_map(pp_context->sampler_state_table.bo, True);
2133     assert(pp_context->sampler_state_table.bo->virtual);
2134     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2135     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2136
2137     /* sample_8x8 Y index 1 */
2138     index = 1;
2139     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2140     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2141     sampler_8x8[index].dw0.ief_bypass = 1;
2142     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2143     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2144     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2145     sampler_8x8[index].dw2.global_noise_estimation = 22;
2146     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2147     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2148     sampler_8x8[index].dw3.strong_edge_weight = 7;
2149     sampler_8x8[index].dw3.regular_weight = 2;
2150     sampler_8x8[index].dw3.non_edge_weight = 0;
2151     sampler_8x8[index].dw3.gain_factor = 40;
2152     sampler_8x8[index].dw4.steepness_boost = 0;
2153     sampler_8x8[index].dw4.steepness_threshold = 0;
2154     sampler_8x8[index].dw4.mr_boost = 0;
2155     sampler_8x8[index].dw4.mr_threshold = 5;
2156     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2157     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2158     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2159     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2160     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2161     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2162     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2163     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2164     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2165     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2166     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2167     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2168     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2169     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2170     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2171     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2172     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2173     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2174     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2175     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2176     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2177     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2178     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2179     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2180     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2181     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2182     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2183     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2184     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2185     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2186     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2187     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2188     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2189     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2190     sampler_8x8[index].dw13.limiter_boost = 0;
2191     sampler_8x8[index].dw13.minimum_limiter = 10;
2192     sampler_8x8[index].dw13.maximum_limiter = 11;
2193     sampler_8x8[index].dw14.clip_limiter = 130;
2194     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2195                       I915_GEM_DOMAIN_RENDER, 
2196                       0,
2197                       0,
2198                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2199                       pp_context->sampler_state_table.bo_8x8);
2200
2201     /* sample_8x8 UV index 2 */
2202     index = 2;
2203     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2204     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2205     sampler_8x8[index].dw0.ief_bypass = 1;
2206     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2207     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2208     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2209     sampler_8x8[index].dw2.global_noise_estimation = 22;
2210     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2211     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2212     sampler_8x8[index].dw3.strong_edge_weight = 7;
2213     sampler_8x8[index].dw3.regular_weight = 2;
2214     sampler_8x8[index].dw3.non_edge_weight = 0;
2215     sampler_8x8[index].dw3.gain_factor = 40;
2216     sampler_8x8[index].dw4.steepness_boost = 0;
2217     sampler_8x8[index].dw4.steepness_threshold = 0;
2218     sampler_8x8[index].dw4.mr_boost = 0;
2219     sampler_8x8[index].dw4.mr_threshold = 5;
2220     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2221     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2222     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2223     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2224     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2225     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2226     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2227     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2228     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2229     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2230     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2231     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2232     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2233     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2234     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2235     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2236     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2237     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2238     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2239     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2240     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2241     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2242     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2243     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2244     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2245     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2246     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2247     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2248     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2249     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2250     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2251     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2252     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2253     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2254     sampler_8x8[index].dw13.limiter_boost = 0;
2255     sampler_8x8[index].dw13.minimum_limiter = 10;
2256     sampler_8x8[index].dw13.maximum_limiter = 11;
2257     sampler_8x8[index].dw14.clip_limiter = 130;
2258     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2259                       I915_GEM_DOMAIN_RENDER, 
2260                       0,
2261                       0,
2262                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2263                       pp_context->sampler_state_table.bo_8x8);
2264
2265     dri_bo_unmap(pp_context->sampler_state_table.bo);
2266
2267     /* private function & data */
2268     pp_context->pp_x_steps = pp_avs_x_steps;
2269     pp_context->pp_y_steps = pp_avs_y_steps;
2270     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2271
2272     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2273     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2274     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2275     pp_avs_context->dest_y = dst_rect->y;
2276     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2277     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2278     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2279     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2280     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2281     pp_avs_context->src_h = src_rect->height;
2282
2283     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2284     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2285
2286     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2287     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2288     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2289     pp_inline_parameter->grf6.video_step_delta = 0.0;
2290
2291     dst_surface->flags = src_surface->flags;
2292
2293     return VA_STATUS_SUCCESS;
2294 }
2295
2296 static VAStatus
2297 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2298                             const struct i965_surface *src_surface,
2299                             const VARectangle *src_rect,
2300                             struct i965_surface *dst_surface,
2301                             const VARectangle *dst_rect,
2302                             void *filter_param)
2303 {
2304     return pp_nv12_avs_initialize(ctx, pp_context,
2305                                   src_surface,
2306                                   src_rect,
2307                                   dst_surface,
2308                                   dst_rect,
2309                                   filter_param,
2310                                   1);
2311 }
2312
2313 static VAStatus
2314 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2315                              const struct i965_surface *src_surface,
2316                              const VARectangle *src_rect,
2317                              struct i965_surface *dst_surface,
2318                              const VARectangle *dst_rect,
2319                              void *filter_param)
2320 {
2321     return pp_nv12_avs_initialize(ctx, pp_context,
2322                                   src_surface,
2323                                   src_rect,
2324                                   dst_surface,
2325                                   dst_rect,
2326                                   filter_param,
2327                                   0);    
2328 }
2329
2330 static int
2331 gen7_pp_avs_x_steps(void *private_context)
2332 {
2333     struct pp_avs_context *pp_avs_context = private_context;
2334
2335     return pp_avs_context->dest_w / 16;
2336 }
2337
2338 static int
2339 gen7_pp_avs_y_steps(void *private_context)
2340 {
2341     struct pp_avs_context *pp_avs_context = private_context;
2342
2343     return pp_avs_context->dest_h / 16;
2344 }
2345
2346 static int
2347 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2348 {
2349     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2350     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2351
2352     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2353     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2354     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2355     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2356
2357     return 0;
2358 }
2359
2360 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2361                                               struct i965_post_processing_context *pp_context,
2362                                               const struct i965_surface *surface)
2363 {
2364     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2365     int fourcc = pp_get_surface_fourcc(ctx, surface);
2366     
2367     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2368         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2369         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2370         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2371     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2372         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2373         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2374         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2375     }
2376 }
2377
2378 static VAStatus
2379 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2380                            const struct i965_surface *src_surface,
2381                            const VARectangle *src_rect,
2382                            struct i965_surface *dst_surface,
2383                            const VARectangle *dst_rect,
2384                            void *filter_param)
2385 {
2386     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2387     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2388     struct gen7_sampler_8x8 *sampler_8x8;
2389     struct i965_sampler_8x8_state *sampler_8x8_state;
2390     int index, i;
2391     int width[3], height[3], pitch[3], offset[3];
2392     int src_width, src_height;
2393
2394     /* source surface */
2395     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2396                                          width, height, pitch, offset);
2397     src_width = width[0];
2398     src_height = height[0];
2399
2400     /* destination surface */
2401     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2402                                          width, height, pitch, offset);
2403
2404     /* sampler 8x8 state */
2405     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2406     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2407     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2408     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2409     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2410
2411     for (i = 0; i < 17; i++) {
2412         /* for Y channel, currently ignore */
2413         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2414         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2415         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2416         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2417         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2418         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2419         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2420         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2421         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2422         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2423         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2424         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2425         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2426         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2427         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2428         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2429         /* for U/V channel, 0.25 */
2430         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2431         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2432         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2433         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2434         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2435         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2436         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2437         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2438         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2439         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2440         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2441         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2442         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2443         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2444         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2445         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2446     }
2447
2448     sampler_8x8_state->dw136.default_sharpness_level = 0;
2449     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2450     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2451     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2452     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2453
2454     /* sampler 8x8 */
2455     dri_bo_map(pp_context->sampler_state_table.bo, True);
2456     assert(pp_context->sampler_state_table.bo->virtual);
2457     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2458     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2459
2460     /* sample_8x8 Y index 4 */
2461     index = 4;
2462     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2463     sampler_8x8[index].dw0.global_noise_estimation = 255;
2464     sampler_8x8[index].dw0.ief_bypass = 1;
2465
2466     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2467
2468     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2469     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2470     sampler_8x8[index].dw2.r5x_coefficient = 9;
2471     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2472     sampler_8x8[index].dw2.r5c_coefficient = 3;
2473
2474     sampler_8x8[index].dw3.r3x_coefficient = 27;
2475     sampler_8x8[index].dw3.r3c_coefficient = 5;
2476     sampler_8x8[index].dw3.gain_factor = 40;
2477     sampler_8x8[index].dw3.non_edge_weight = 1;
2478     sampler_8x8[index].dw3.regular_weight = 2;
2479     sampler_8x8[index].dw3.strong_edge_weight = 7;
2480     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2481
2482     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2483                       I915_GEM_DOMAIN_RENDER, 
2484                       0,
2485                       0,
2486                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2487                       pp_context->sampler_state_table.bo_8x8);
2488
2489     /* sample_8x8 UV index 8 */
2490     index = 8;
2491     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2492     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2493     sampler_8x8[index].dw0.global_noise_estimation = 255;
2494     sampler_8x8[index].dw0.ief_bypass = 1;
2495     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2496     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2497     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2498     sampler_8x8[index].dw2.r5x_coefficient = 9;
2499     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2500     sampler_8x8[index].dw2.r5c_coefficient = 3;
2501     sampler_8x8[index].dw3.r3x_coefficient = 27;
2502     sampler_8x8[index].dw3.r3c_coefficient = 5;
2503     sampler_8x8[index].dw3.gain_factor = 40;
2504     sampler_8x8[index].dw3.non_edge_weight = 1;
2505     sampler_8x8[index].dw3.regular_weight = 2;
2506     sampler_8x8[index].dw3.strong_edge_weight = 7;
2507     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2508
2509     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2510                       I915_GEM_DOMAIN_RENDER, 
2511                       0,
2512                       0,
2513                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2514                       pp_context->sampler_state_table.bo_8x8);
2515
2516     /* sampler_8x8 V, index 12 */
2517     index = 12;
2518     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2519     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2520     sampler_8x8[index].dw0.global_noise_estimation = 255;
2521     sampler_8x8[index].dw0.ief_bypass = 1;
2522     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2523     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2524     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2525     sampler_8x8[index].dw2.r5x_coefficient = 9;
2526     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2527     sampler_8x8[index].dw2.r5c_coefficient = 3;
2528     sampler_8x8[index].dw3.r3x_coefficient = 27;
2529     sampler_8x8[index].dw3.r3c_coefficient = 5;
2530     sampler_8x8[index].dw3.gain_factor = 40;
2531     sampler_8x8[index].dw3.non_edge_weight = 1;
2532     sampler_8x8[index].dw3.regular_weight = 2;
2533     sampler_8x8[index].dw3.strong_edge_weight = 7;
2534     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2535
2536     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2537                       I915_GEM_DOMAIN_RENDER, 
2538                       0,
2539                       0,
2540                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2541                       pp_context->sampler_state_table.bo_8x8);
2542
2543     dri_bo_unmap(pp_context->sampler_state_table.bo);
2544
2545     /* private function & data */
2546     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2547     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2548     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2549
2550     pp_avs_context->dest_x = dst_rect->x;
2551     pp_avs_context->dest_y = dst_rect->y;
2552     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2553     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2554     pp_avs_context->src_w = src_rect->width;
2555     pp_avs_context->src_h = src_rect->height;
2556
2557     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2558     dw = MAX(dw, pp_avs_context->dest_w);
2559
2560     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2561     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2562     pp_static_parameter->grf2.avs_wa_width = dw;
2563     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2564     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2565
2566     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2567     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2568     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2569     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2570
2571     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2572
2573     dst_surface->flags = src_surface->flags;
2574
2575     return VA_STATUS_SUCCESS;
2576 }
2577
2578 static int
2579 pp_dndi_x_steps(void *private_context)
2580 {
2581     return 1;
2582 }
2583
2584 static int
2585 pp_dndi_y_steps(void *private_context)
2586 {
2587     struct pp_dndi_context *pp_dndi_context = private_context;
2588
2589     return pp_dndi_context->dest_h / 4;
2590 }
2591
2592 static int
2593 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2594 {
2595     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2596
2597     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2598     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2599
2600     return 0;
2601 }
2602
2603 static VAStatus
2604 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2605                         const struct i965_surface *src_surface,
2606                         const VARectangle *src_rect,
2607                         struct i965_surface *dst_surface,
2608                         const VARectangle *dst_rect,
2609                         void *filter_param)
2610 {
2611     struct i965_driver_data *i965 = i965_driver_data(ctx);
2612     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2613     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2614     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2615     struct object_surface *obj_surface;
2616     struct i965_sampler_dndi *sampler_dndi;
2617     int index;
2618     int w, h;
2619     int orig_w, orig_h;
2620     int dndi_top_first = 1;
2621
2622     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2623         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2624
2625     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2626         dndi_top_first = 1;
2627     else
2628         dndi_top_first = 0;
2629
2630     /* surface */
2631     obj_surface = SURFACE(src_surface->id);
2632     orig_w = obj_surface->orig_width;
2633     orig_h = obj_surface->orig_height;
2634     w = obj_surface->width;
2635     h = obj_surface->height;
2636
2637     if (pp_context->stmm.bo == NULL) {
2638         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2639                                            "STMM surface",
2640                                            w * h,
2641                                            4096);
2642         assert(pp_context->stmm.bo);
2643     }
2644
2645     /* source UV surface index 2 */
2646     i965_pp_set_surface_state(ctx, pp_context,
2647                               obj_surface->bo, w * h,
2648                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2649                               2, 0);
2650
2651     /* source YUV surface index 4 */
2652     i965_pp_set_surface2_state(ctx, pp_context,
2653                                obj_surface->bo, 0,
2654                                orig_w, orig_h, w,
2655                                0, h,
2656                                SURFACE_FORMAT_PLANAR_420_8, 1,
2657                                4);
2658
2659     /* source STMM surface index 20 */
2660     i965_pp_set_surface_state(ctx, pp_context,
2661                               pp_context->stmm.bo, 0,
2662                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2663                               20, 1);
2664
2665     /* destination surface */
2666     obj_surface = SURFACE(dst_surface->id);
2667     orig_w = obj_surface->orig_width;
2668     orig_h = obj_surface->orig_height;
2669     w = obj_surface->width;
2670     h = obj_surface->height;
2671
2672     /* destination Y surface index 7 */
2673     i965_pp_set_surface_state(ctx, pp_context,
2674                               obj_surface->bo, 0,
2675                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2676                               7, 1);
2677
2678     /* destination UV surface index 8 */
2679     i965_pp_set_surface_state(ctx, pp_context,
2680                               obj_surface->bo, w * h,
2681                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2682                               8, 1);
2683     /* sampler dndi */
2684     dri_bo_map(pp_context->sampler_state_table.bo, True);
2685     assert(pp_context->sampler_state_table.bo->virtual);
2686     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2687     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2688
2689     /* sample dndi index 1 */
2690     index = 0;
2691     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2692     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2693     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2694     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2695
2696     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2697     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2698     sampler_dndi[index].dw1.stmm_c2 = 1;
2699     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2700     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2701
2702     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2703     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2704     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2705     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2706
2707     sampler_dndi[index].dw3.maximum_stmm = 128;
2708     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2709     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2710     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2711     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2712
2713     sampler_dndi[index].dw4.sdi_delta = 8;
2714     sampler_dndi[index].dw4.sdi_threshold = 128;
2715     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2716     sampler_dndi[index].dw4.stmm_shift_up = 0;
2717     sampler_dndi[index].dw4.stmm_shift_down = 0;
2718     sampler_dndi[index].dw4.minimum_stmm = 0;
2719
2720     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2721     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2722     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2723     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2724
2725     sampler_dndi[index].dw6.dn_enable = 1;
2726     sampler_dndi[index].dw6.di_enable = 1;
2727     sampler_dndi[index].dw6.di_partial = 0;
2728     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2729     sampler_dndi[index].dw6.dndi_stream_id = 0;
2730     sampler_dndi[index].dw6.dndi_first_frame = 1;
2731     sampler_dndi[index].dw6.progressive_dn = 0;
2732     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2733     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2734     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2735
2736     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2737     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2738     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2739     sampler_dndi[index].dw7.column_width_minus1 = 0;
2740
2741     dri_bo_unmap(pp_context->sampler_state_table.bo);
2742
2743     /* private function & data */
2744     pp_context->pp_x_steps = pp_dndi_x_steps;
2745     pp_context->pp_y_steps = pp_dndi_y_steps;
2746     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2747
2748     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2749     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2750     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2751     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2752
2753     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2754     pp_inline_parameter->grf5.number_blocks = w / 16;
2755     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2756     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2757
2758     pp_dndi_context->dest_w = w;
2759     pp_dndi_context->dest_h = h;
2760
2761     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2762
2763     return VA_STATUS_SUCCESS;
2764 }
2765
2766 static int
2767 pp_dn_x_steps(void *private_context)
2768 {
2769     return 1;
2770 }
2771
2772 static int
2773 pp_dn_y_steps(void *private_context)
2774 {
2775     struct pp_dn_context *pp_dn_context = private_context;
2776
2777     return pp_dn_context->dest_h / 8;
2778 }
2779
2780 static int
2781 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2782 {
2783     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2784
2785     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2786     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2787
2788     return 0;
2789 }
2790
2791 static VAStatus
2792 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2793                       const struct i965_surface *src_surface,
2794                       const VARectangle *src_rect,
2795                       struct i965_surface *dst_surface,
2796                       const VARectangle *dst_rect,
2797                       void *filter_param)
2798 {
2799     struct i965_driver_data *i965 = i965_driver_data(ctx);
2800     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2801     struct object_surface *obj_surface;
2802     struct i965_sampler_dndi *sampler_dndi;
2803     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2804     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2805     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2806     int index;
2807     int w, h;
2808     int orig_w, orig_h;
2809     int dn_strength = 15;
2810     int dndi_top_first = 1;
2811     int dn_progressive = 0;
2812
2813     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2814         dndi_top_first = 1;
2815         dn_progressive = 1;
2816     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2817         dndi_top_first = 1;
2818         dn_progressive = 0;
2819     } else {
2820         dndi_top_first = 0;
2821         dn_progressive = 0;
2822     }
2823
2824     if (dn_filter_param) {
2825         float value = dn_filter_param->value;
2826         
2827         if (value > 1.0)
2828             value = 1.0;
2829         
2830         if (value < 0.0)
2831             value = 0.0;
2832
2833         dn_strength = (int)(value * 31.0F);
2834     }
2835
2836     /* surface */
2837     obj_surface = SURFACE(src_surface->id);
2838     orig_w = obj_surface->orig_width;
2839     orig_h = obj_surface->orig_height;
2840     w = obj_surface->width;
2841     h = obj_surface->height;
2842
2843     if (pp_context->stmm.bo == NULL) {
2844         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2845                                            "STMM surface",
2846                                            w * h,
2847                                            4096);
2848         assert(pp_context->stmm.bo);
2849     }
2850
2851     /* source UV surface index 2 */
2852     i965_pp_set_surface_state(ctx, pp_context,
2853                               obj_surface->bo, w * h,
2854                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2855                               2, 0);
2856
2857     /* source YUV surface index 4 */
2858     i965_pp_set_surface2_state(ctx, pp_context,
2859                                obj_surface->bo, 0,
2860                                orig_w, orig_h, w,
2861                                0, h,
2862                                SURFACE_FORMAT_PLANAR_420_8, 1,
2863                                4);
2864
2865     /* source STMM surface index 20 */
2866     i965_pp_set_surface_state(ctx, pp_context,
2867                               pp_context->stmm.bo, 0,
2868                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2869                               20, 1);
2870
2871     /* destination surface */
2872     obj_surface = SURFACE(dst_surface->id);
2873     orig_w = obj_surface->orig_width;
2874     orig_h = obj_surface->orig_height;
2875     w = obj_surface->width;
2876     h = obj_surface->height;
2877
2878     /* destination Y surface index 7 */
2879     i965_pp_set_surface_state(ctx, pp_context,
2880                               obj_surface->bo, 0,
2881                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2882                               7, 1);
2883
2884     /* destination UV surface index 8 */
2885     i965_pp_set_surface_state(ctx, pp_context,
2886                               obj_surface->bo, w * h,
2887                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2888                               8, 1);
2889     /* sampler dn */
2890     dri_bo_map(pp_context->sampler_state_table.bo, True);
2891     assert(pp_context->sampler_state_table.bo->virtual);
2892     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2893     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2894
2895     /* sample dndi index 1 */
2896     index = 0;
2897     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2898     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2899     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2900     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2901
2902     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2903     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2904     sampler_dndi[index].dw1.stmm_c2 = 0;
2905     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2906     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2907
2908     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2909     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2910     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2911     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2912
2913     sampler_dndi[index].dw3.maximum_stmm = 128;
2914     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2915     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2916     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2917     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2918
2919     sampler_dndi[index].dw4.sdi_delta = 8;
2920     sampler_dndi[index].dw4.sdi_threshold = 128;
2921     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2922     sampler_dndi[index].dw4.stmm_shift_up = 0;
2923     sampler_dndi[index].dw4.stmm_shift_down = 0;
2924     sampler_dndi[index].dw4.minimum_stmm = 0;
2925
2926     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2927     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2928     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2929     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2930
2931     sampler_dndi[index].dw6.dn_enable = 1;
2932     sampler_dndi[index].dw6.di_enable = 0;
2933     sampler_dndi[index].dw6.di_partial = 0;
2934     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2935     sampler_dndi[index].dw6.dndi_stream_id = 1;
2936     sampler_dndi[index].dw6.dndi_first_frame = 1;
2937     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2938     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2939     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2940     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2941
2942     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2943     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2944     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2945     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2946
2947     dri_bo_unmap(pp_context->sampler_state_table.bo);
2948
2949     /* private function & data */
2950     pp_context->pp_x_steps = pp_dn_x_steps;
2951     pp_context->pp_y_steps = pp_dn_y_steps;
2952     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2953
2954     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2955     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2956     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2957     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2958
2959     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2960     pp_inline_parameter->grf5.number_blocks = w / 16;
2961     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2962     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2963
2964     pp_dn_context->dest_w = w;
2965     pp_dn_context->dest_h = h;
2966
2967     dst_surface->flags = src_surface->flags;
2968     
2969     return VA_STATUS_SUCCESS;
2970 }
2971
2972 static int
2973 gen7_pp_dndi_x_steps(void *private_context)
2974 {
2975     struct pp_dndi_context *pp_dndi_context = private_context;
2976
2977     return pp_dndi_context->dest_w / 16;
2978 }
2979
2980 static int
2981 gen7_pp_dndi_y_steps(void *private_context)
2982 {
2983     struct pp_dndi_context *pp_dndi_context = private_context;
2984
2985     return pp_dndi_context->dest_h / 4;
2986 }
2987
2988 static int
2989 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2990 {
2991     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2992
2993     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2994     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2995
2996     return 0;
2997 }
2998
2999 static VAStatus
3000 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3001                              const struct i965_surface *src_surface,
3002                              const VARectangle *src_rect,
3003                              struct i965_surface *dst_surface,
3004                              const VARectangle *dst_rect,
3005                              void *filter_param)
3006 {
3007     struct i965_driver_data *i965 = i965_driver_data(ctx);
3008     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3009     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3010     struct object_surface *obj_surface;
3011     struct gen7_sampler_dndi *sampler_dndi;
3012     int index;
3013     int w, h;
3014     int orig_w, orig_h;
3015     int dndi_top_first = 1;
3016
3017     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3018         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3019
3020     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3021         dndi_top_first = 1;
3022     else
3023         dndi_top_first = 0;
3024
3025     /* surface */
3026     obj_surface = SURFACE(src_surface->id);
3027     orig_w = obj_surface->orig_width;
3028     orig_h = obj_surface->orig_height;
3029     w = obj_surface->width;
3030     h = obj_surface->height;
3031
3032     if (pp_context->stmm.bo == NULL) {
3033         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3034                                            "STMM surface",
3035                                            w * h,
3036                                            4096);
3037         assert(pp_context->stmm.bo);
3038     }
3039
3040     /* source UV surface index 1 */
3041     gen7_pp_set_surface_state(ctx, pp_context,
3042                               obj_surface->bo, w * h,
3043                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3044                               1, 0);
3045
3046     /* source YUV surface index 3 */
3047     gen7_pp_set_surface2_state(ctx, pp_context,
3048                                obj_surface->bo, 0,
3049                                orig_w, orig_h, w,
3050                                0, h,
3051                                SURFACE_FORMAT_PLANAR_420_8, 1,
3052                                3);
3053
3054     /* source (temporal reference) YUV surface index 4 */
3055     gen7_pp_set_surface2_state(ctx, pp_context,
3056                                obj_surface->bo, 0,
3057                                orig_w, orig_h, w,
3058                                0, h,
3059                                SURFACE_FORMAT_PLANAR_420_8, 1,
3060                                4);
3061
3062     /* STMM / History Statistics input surface, index 5 */
3063     gen7_pp_set_surface_state(ctx, pp_context,
3064                               pp_context->stmm.bo, 0,
3065                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3066                               5, 1);
3067
3068     /* destination surface */
3069     obj_surface = SURFACE(dst_surface->id);
3070     orig_w = obj_surface->orig_width;
3071     orig_h = obj_surface->orig_height;
3072     w = obj_surface->width;
3073     h = obj_surface->height;
3074
3075     /* destination(Previous frame) Y surface index 27 */
3076     gen7_pp_set_surface_state(ctx, pp_context,
3077                               obj_surface->bo, 0,
3078                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3079                               27, 1);
3080
3081     /* destination(Previous frame) UV surface index 28 */
3082     gen7_pp_set_surface_state(ctx, pp_context,
3083                               obj_surface->bo, w * h,
3084                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3085                               28, 1);
3086
3087     /* destination(Current frame) Y surface index 30 */
3088     gen7_pp_set_surface_state(ctx, pp_context,
3089                               obj_surface->bo, 0,
3090                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3091                               30, 1);
3092
3093     /* destination(Current frame) UV surface index 31 */
3094     gen7_pp_set_surface_state(ctx, pp_context,
3095                               obj_surface->bo, w * h,
3096                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3097                               31, 1);
3098
3099     /* STMM output surface, index 33 */
3100     gen7_pp_set_surface_state(ctx, pp_context,
3101                               pp_context->stmm.bo, 0,
3102                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3103                               33, 1);
3104
3105
3106     /* sampler dndi */
3107     dri_bo_map(pp_context->sampler_state_table.bo, True);
3108     assert(pp_context->sampler_state_table.bo->virtual);
3109     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3110     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3111
3112     /* sample dndi index 0 */
3113     index = 0;
3114     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3115     sampler_dndi[index].dw0.dnmh_delt = 8;
3116     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3117     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3118     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3119     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3120
3121     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3122     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3123     sampler_dndi[index].dw1.stmm_c2 = 0;
3124     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3125     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3126
3127     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3128     sampler_dndi[index].dw2.bne_edge_th = 1;
3129     sampler_dndi[index].dw2.smooth_mv_th = 0;
3130     sampler_dndi[index].dw2.sad_tight_th = 5;
3131     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3132     sampler_dndi[index].dw2.good_neighbor_th = 4;
3133
3134     sampler_dndi[index].dw3.maximum_stmm = 128;
3135     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3136     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3137     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3138     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3139
3140     sampler_dndi[index].dw4.sdi_delta = 8;
3141     sampler_dndi[index].dw4.sdi_threshold = 128;
3142     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3143     sampler_dndi[index].dw4.stmm_shift_up = 0;
3144     sampler_dndi[index].dw4.stmm_shift_down = 0;
3145     sampler_dndi[index].dw4.minimum_stmm = 0;
3146
3147     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3148     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3149     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3150     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3151
3152     sampler_dndi[index].dw6.dn_enable = 0;
3153     sampler_dndi[index].dw6.di_enable = 1;
3154     sampler_dndi[index].dw6.di_partial = 0;
3155     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3156     sampler_dndi[index].dw6.dndi_stream_id = 1;
3157     sampler_dndi[index].dw6.dndi_first_frame = 1;
3158     sampler_dndi[index].dw6.progressive_dn = 0;
3159     sampler_dndi[index].dw6.mcdi_enable = 0;
3160     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3161     sampler_dndi[index].dw6.cat_th1 = 0;
3162     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3163     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3164
3165     sampler_dndi[index].dw7.sad_tha = 5;
3166     sampler_dndi[index].dw7.sad_thb = 10;
3167     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3168     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3169     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3170     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3171     sampler_dndi[index].dw7.neighborpixel_th = 10;
3172     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3173
3174     dri_bo_unmap(pp_context->sampler_state_table.bo);
3175
3176     /* private function & data */
3177     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3178     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3179     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3180
3181     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3182     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3183     pp_static_parameter->grf1.di_top_field_first = 0;
3184     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3185
3186     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3187     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3188     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3189
3190     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3191     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3192
3193     pp_dndi_context->dest_w = w;
3194     pp_dndi_context->dest_h = h;
3195
3196     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3197
3198     return VA_STATUS_SUCCESS;
3199 }
3200
3201 static int
3202 gen7_pp_dn_x_steps(void *private_context)
3203 {
3204     struct pp_dn_context *pp_dn_context = private_context;
3205
3206     return pp_dn_context->dest_w / 16;
3207 }
3208
3209 static int
3210 gen7_pp_dn_y_steps(void *private_context)
3211 {
3212     struct pp_dn_context *pp_dn_context = private_context;
3213
3214     return pp_dn_context->dest_h / 4;
3215 }
3216
3217 static int
3218 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3219 {
3220     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3221
3222     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3223     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3224
3225     return 0;
3226 }
3227
3228 static VAStatus
3229 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3230                            const struct i965_surface *src_surface,
3231                            const VARectangle *src_rect,
3232                            struct i965_surface *dst_surface,
3233                            const VARectangle *dst_rect,
3234                            void *filter_param)
3235 {
3236     struct i965_driver_data *i965 = i965_driver_data(ctx);
3237     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3238     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3239     struct object_surface *obj_surface;
3240     struct gen7_sampler_dndi *sampler_dn;
3241     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3242     int index;
3243     int w, h;
3244     int orig_w, orig_h;
3245     int dn_strength = 15;
3246     int dndi_top_first = 1;
3247     int dn_progressive = 0;
3248
3249     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3250         dndi_top_first = 1;
3251         dn_progressive = 1;
3252     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3253         dndi_top_first = 1;
3254         dn_progressive = 0;
3255     } else {
3256         dndi_top_first = 0;
3257         dn_progressive = 0;
3258     }
3259
3260     if (dn_filter_param) {
3261         float value = dn_filter_param->value;
3262         
3263         if (value > 1.0)
3264             value = 1.0;
3265         
3266         if (value < 0.0)
3267             value = 0.0;
3268
3269         dn_strength = (int)(value * 31.0F);
3270     }
3271
3272     /* surface */
3273     obj_surface = SURFACE(src_surface->id);
3274     orig_w = obj_surface->orig_width;
3275     orig_h = obj_surface->orig_height;
3276     w = obj_surface->width;
3277     h = obj_surface->height;
3278
3279     if (pp_context->stmm.bo == NULL) {
3280         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3281                                            "STMM surface",
3282                                            w * h,
3283                                            4096);
3284         assert(pp_context->stmm.bo);
3285     }
3286
3287     /* source UV surface index 1 */
3288     gen7_pp_set_surface_state(ctx, pp_context,
3289                               obj_surface->bo, w * h,
3290                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3291                               1, 0);
3292
3293     /* source YUV surface index 3 */
3294     gen7_pp_set_surface2_state(ctx, pp_context,
3295                                obj_surface->bo, 0,
3296                                orig_w, orig_h, w,
3297                                0, h,
3298                                SURFACE_FORMAT_PLANAR_420_8, 1,
3299                                3);
3300
3301     /* source (temporal reference) YUV surface index 4 */
3302     gen7_pp_set_surface2_state(ctx, pp_context,
3303                                obj_surface->bo, 0,
3304                                orig_w, orig_h, w,
3305                                0, h,
3306                                SURFACE_FORMAT_PLANAR_420_8, 1,
3307                                4);
3308
3309     /* STMM / History Statistics input surface, index 5 */
3310     gen7_pp_set_surface_state(ctx, pp_context,
3311                               pp_context->stmm.bo, 0,
3312                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3313                               5, 1);
3314
3315     /* destination surface */
3316     obj_surface = SURFACE(dst_surface->id);
3317     orig_w = obj_surface->orig_width;
3318     orig_h = obj_surface->orig_height;
3319     w = obj_surface->width;
3320     h = obj_surface->height;
3321
3322     /* destination Y surface index 24 */
3323     gen7_pp_set_surface_state(ctx, pp_context,
3324                               obj_surface->bo, 0,
3325                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3326                               24, 1);
3327
3328     /* destination UV surface index 25 */
3329     gen7_pp_set_surface_state(ctx, pp_context,
3330                               obj_surface->bo, w * h,
3331                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3332                               25, 1);
3333
3334     /* sampler dn */
3335     dri_bo_map(pp_context->sampler_state_table.bo, True);
3336     assert(pp_context->sampler_state_table.bo->virtual);
3337     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3338     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3339
3340     /* sample dn index 1 */
3341     index = 0;
3342     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3343     sampler_dn[index].dw0.dnmh_delt = 8;
3344     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3345     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3346     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3347     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3348
3349     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3350     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3351     sampler_dn[index].dw1.stmm_c2 = 0;
3352     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3353     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3354
3355     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3356     sampler_dn[index].dw2.bne_edge_th = 1;
3357     sampler_dn[index].dw2.smooth_mv_th = 0;
3358     sampler_dn[index].dw2.sad_tight_th = 5;
3359     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3360     sampler_dn[index].dw2.good_neighbor_th = 4;
3361
3362     sampler_dn[index].dw3.maximum_stmm = 128;
3363     sampler_dn[index].dw3.multipler_for_vecm = 2;
3364     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3365     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3366     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3367
3368     sampler_dn[index].dw4.sdi_delta = 8;
3369     sampler_dn[index].dw4.sdi_threshold = 128;
3370     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3371     sampler_dn[index].dw4.stmm_shift_up = 0;
3372     sampler_dn[index].dw4.stmm_shift_down = 0;
3373     sampler_dn[index].dw4.minimum_stmm = 0;
3374
3375     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3376     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3377     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3378     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3379
3380     sampler_dn[index].dw6.dn_enable = 1;
3381     sampler_dn[index].dw6.di_enable = 0;
3382     sampler_dn[index].dw6.di_partial = 0;
3383     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3384     sampler_dn[index].dw6.dndi_stream_id = 1;
3385     sampler_dn[index].dw6.dndi_first_frame = 1;
3386     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3387     sampler_dn[index].dw6.mcdi_enable = 0;
3388     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3389     sampler_dn[index].dw6.cat_th1 = 0;
3390     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3391     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3392
3393     sampler_dn[index].dw7.sad_tha = 5;
3394     sampler_dn[index].dw7.sad_thb = 10;
3395     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3396     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3397     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3398     sampler_dn[index].dw7.vdi_walker_enable = 0;
3399     sampler_dn[index].dw7.neighborpixel_th = 10;
3400     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3401
3402     dri_bo_unmap(pp_context->sampler_state_table.bo);
3403
3404     /* private function & data */
3405     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3406     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3407     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3408
3409     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3410     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3411     pp_static_parameter->grf1.di_top_field_first = 0;
3412     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3413
3414     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3415     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3416     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3417
3418     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3419     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3420
3421     pp_dn_context->dest_w = w;
3422     pp_dn_context->dest_h = h;
3423
3424     dst_surface->flags = src_surface->flags;
3425
3426     return VA_STATUS_SUCCESS;
3427 }
3428
3429 static VAStatus
3430 ironlake_pp_initialize(
3431     VADriverContextP   ctx,
3432     struct i965_post_processing_context *pp_context,
3433     const struct i965_surface *src_surface,
3434     const VARectangle *src_rect,
3435     struct i965_surface *dst_surface,
3436     const VARectangle *dst_rect,
3437     int                pp_index,
3438     void *filter_param
3439 )
3440 {
3441     VAStatus va_status;
3442     struct i965_driver_data *i965 = i965_driver_data(ctx);
3443     struct pp_module *pp_module;
3444     dri_bo *bo;
3445     int static_param_size, inline_param_size;
3446
3447     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3448     bo = dri_bo_alloc(i965->intel.bufmgr,
3449                       "surface state & binding table",
3450                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3451                       4096);
3452     assert(bo);
3453     pp_context->surface_state_binding_table.bo = bo;
3454
3455     dri_bo_unreference(pp_context->curbe.bo);
3456     bo = dri_bo_alloc(i965->intel.bufmgr,
3457                       "constant buffer",
3458                       4096, 
3459                       4096);
3460     assert(bo);
3461     pp_context->curbe.bo = bo;
3462
3463     dri_bo_unreference(pp_context->idrt.bo);
3464     bo = dri_bo_alloc(i965->intel.bufmgr, 
3465                       "interface discriptor", 
3466                       sizeof(struct i965_interface_descriptor), 
3467                       4096);
3468     assert(bo);
3469     pp_context->idrt.bo = bo;
3470     pp_context->idrt.num_interface_descriptors = 0;
3471
3472     dri_bo_unreference(pp_context->sampler_state_table.bo);
3473     bo = dri_bo_alloc(i965->intel.bufmgr, 
3474                       "sampler state table", 
3475                       4096,
3476                       4096);
3477     assert(bo);
3478     dri_bo_map(bo, True);
3479     memset(bo->virtual, 0, bo->size);
3480     dri_bo_unmap(bo);
3481     pp_context->sampler_state_table.bo = bo;
3482
3483     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3484     bo = dri_bo_alloc(i965->intel.bufmgr, 
3485                       "sampler 8x8 state ",
3486                       4096,
3487                       4096);
3488     assert(bo);
3489     pp_context->sampler_state_table.bo_8x8 = bo;
3490
3491     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3492     bo = dri_bo_alloc(i965->intel.bufmgr, 
3493                       "sampler 8x8 state ",
3494                       4096,
3495                       4096);
3496     assert(bo);
3497     pp_context->sampler_state_table.bo_8x8_uv = bo;
3498
3499     dri_bo_unreference(pp_context->vfe_state.bo);
3500     bo = dri_bo_alloc(i965->intel.bufmgr, 
3501                       "vfe state", 
3502                       sizeof(struct i965_vfe_state), 
3503                       4096);
3504     assert(bo);
3505     pp_context->vfe_state.bo = bo;
3506
3507     static_param_size = sizeof(struct pp_static_parameter);
3508     inline_param_size = sizeof(struct pp_inline_parameter);
3509
3510     memset(pp_context->pp_static_parameter, 0, static_param_size);
3511     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3512     
3513     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3514     pp_context->current_pp = pp_index;
3515     pp_module = &pp_context->pp_modules[pp_index];
3516     
3517     if (pp_module->initialize)
3518         va_status = pp_module->initialize(ctx, pp_context,
3519                                           src_surface,
3520                                           src_rect,
3521                                           dst_surface,
3522                                           dst_rect,
3523                                           filter_param);
3524     else
3525         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3526
3527     return va_status;
3528 }
3529
3530 static VAStatus
3531 ironlake_post_processing(
3532     VADriverContextP   ctx,
3533     struct i965_post_processing_context *pp_context,
3534     const struct i965_surface *src_surface,
3535     const VARectangle *src_rect,
3536     struct i965_surface *dst_surface,
3537     const VARectangle *dst_rect,
3538     int                pp_index,
3539     void *filter_param
3540 )
3541 {
3542     VAStatus va_status;
3543
3544     va_status = ironlake_pp_initialize(ctx, pp_context,
3545                                        src_surface,
3546                                        src_rect,
3547                                        dst_surface,
3548                                        dst_rect,
3549                                        pp_index,
3550                                        filter_param);
3551
3552     if (va_status == VA_STATUS_SUCCESS) {
3553         ironlake_pp_states_setup(ctx, pp_context);
3554         ironlake_pp_pipeline_setup(ctx, pp_context);
3555     }
3556
3557     return va_status;
3558 }
3559
3560 static VAStatus
3561 gen6_pp_initialize(
3562     VADriverContextP   ctx,
3563     struct i965_post_processing_context *pp_context,
3564     const struct i965_surface *src_surface,
3565     const VARectangle *src_rect,
3566     struct i965_surface *dst_surface,
3567     const VARectangle *dst_rect,
3568     int                pp_index,
3569     void *filter_param
3570 )
3571 {
3572     VAStatus va_status;
3573     struct i965_driver_data *i965 = i965_driver_data(ctx);
3574     struct pp_module *pp_module;
3575     dri_bo *bo;
3576     int static_param_size, inline_param_size;
3577
3578     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3579     bo = dri_bo_alloc(i965->intel.bufmgr,
3580                       "surface state & binding table",
3581                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3582                       4096);
3583     assert(bo);
3584     pp_context->surface_state_binding_table.bo = bo;
3585
3586     dri_bo_unreference(pp_context->curbe.bo);
3587     bo = dri_bo_alloc(i965->intel.bufmgr,
3588                       "constant buffer",
3589                       4096, 
3590                       4096);
3591     assert(bo);
3592     pp_context->curbe.bo = bo;
3593
3594     dri_bo_unreference(pp_context->idrt.bo);
3595     bo = dri_bo_alloc(i965->intel.bufmgr, 
3596                       "interface discriptor", 
3597                       sizeof(struct gen6_interface_descriptor_data), 
3598                       4096);
3599     assert(bo);
3600     pp_context->idrt.bo = bo;
3601     pp_context->idrt.num_interface_descriptors = 0;
3602
3603     dri_bo_unreference(pp_context->sampler_state_table.bo);
3604     bo = dri_bo_alloc(i965->intel.bufmgr, 
3605                       "sampler state table", 
3606                       4096,
3607                       4096);
3608     assert(bo);
3609     dri_bo_map(bo, True);
3610     memset(bo->virtual, 0, bo->size);
3611     dri_bo_unmap(bo);
3612     pp_context->sampler_state_table.bo = bo;
3613
3614     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3615     bo = dri_bo_alloc(i965->intel.bufmgr, 
3616                       "sampler 8x8 state ",
3617                       4096,
3618                       4096);
3619     assert(bo);
3620     pp_context->sampler_state_table.bo_8x8 = bo;
3621
3622     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3623     bo = dri_bo_alloc(i965->intel.bufmgr, 
3624                       "sampler 8x8 state ",
3625                       4096,
3626                       4096);
3627     assert(bo);
3628     pp_context->sampler_state_table.bo_8x8_uv = bo;
3629
3630     dri_bo_unreference(pp_context->vfe_state.bo);
3631     bo = dri_bo_alloc(i965->intel.bufmgr, 
3632                       "vfe state", 
3633                       sizeof(struct i965_vfe_state), 
3634                       4096);
3635     assert(bo);
3636     pp_context->vfe_state.bo = bo;
3637     
3638     if (IS_GEN7(i965->intel.device_id)) {
3639         static_param_size = sizeof(struct gen7_pp_static_parameter);
3640         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3641     } else {
3642         static_param_size = sizeof(struct pp_static_parameter);
3643         inline_param_size = sizeof(struct pp_inline_parameter);
3644     }
3645
3646     memset(pp_context->pp_static_parameter, 0, static_param_size);
3647     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3648
3649     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3650     pp_context->current_pp = pp_index;
3651     pp_module = &pp_context->pp_modules[pp_index];
3652     
3653     if (pp_module->initialize)
3654         va_status = pp_module->initialize(ctx, pp_context,
3655                                           src_surface,
3656                                           src_rect,
3657                                           dst_surface,
3658                                           dst_rect,
3659                                           filter_param);
3660     else
3661         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3662
3663     calculate_boundary_block_mask(pp_context, dst_rect);
3664     
3665     return va_status;
3666 }
3667
3668 static void
3669 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3670                                    struct i965_post_processing_context *pp_context)
3671 {
3672     struct i965_driver_data *i965 = i965_driver_data(ctx);
3673     struct gen6_interface_descriptor_data *desc;
3674     dri_bo *bo;
3675     int pp_index = pp_context->current_pp;
3676
3677     bo = pp_context->idrt.bo;
3678     dri_bo_map(bo, True);
3679     assert(bo->virtual);
3680     desc = bo->virtual;
3681     memset(desc, 0, sizeof(*desc));
3682     desc->desc0.kernel_start_pointer = 
3683         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3684     desc->desc1.single_program_flow = 1;
3685     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3686     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3687     desc->desc2.sampler_state_pointer = 
3688         pp_context->sampler_state_table.bo->offset >> 5;
3689     desc->desc3.binding_table_entry_count = 0;
3690     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3691     desc->desc4.constant_urb_entry_read_offset = 0;
3692
3693     if (IS_GEN7(i965->intel.device_id))
3694         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3695     else
3696         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3697
3698     dri_bo_emit_reloc(bo,
3699                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3700                       0,
3701                       offsetof(struct gen6_interface_descriptor_data, desc0),
3702                       pp_context->pp_modules[pp_index].kernel.bo);
3703
3704     dri_bo_emit_reloc(bo,
3705                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3706                       desc->desc2.sampler_count << 2,
3707                       offsetof(struct gen6_interface_descriptor_data, desc2),
3708                       pp_context->sampler_state_table.bo);
3709
3710     dri_bo_unmap(bo);
3711     pp_context->idrt.num_interface_descriptors++;
3712 }
3713
3714 static void
3715 gen6_pp_upload_constants(VADriverContextP ctx,
3716                          struct i965_post_processing_context *pp_context)
3717 {
3718     struct i965_driver_data *i965 = i965_driver_data(ctx);
3719     unsigned char *constant_buffer;
3720     int param_size;
3721
3722     assert(sizeof(struct pp_static_parameter) == 128);
3723     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3724
3725     if (IS_GEN7(i965->intel.device_id))
3726         param_size = sizeof(struct gen7_pp_static_parameter);
3727     else
3728         param_size = sizeof(struct pp_static_parameter);
3729
3730     dri_bo_map(pp_context->curbe.bo, 1);
3731     assert(pp_context->curbe.bo->virtual);
3732     constant_buffer = pp_context->curbe.bo->virtual;
3733     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3734     dri_bo_unmap(pp_context->curbe.bo);
3735 }
3736
3737 static void
3738 gen6_pp_states_setup(VADriverContextP ctx,
3739                      struct i965_post_processing_context *pp_context)
3740 {
3741     gen6_pp_interface_descriptor_table(ctx, pp_context);
3742     gen6_pp_upload_constants(ctx, pp_context);
3743 }
3744
3745 static void
3746 gen6_pp_pipeline_select(VADriverContextP ctx,
3747                         struct i965_post_processing_context *pp_context)
3748 {
3749     struct intel_batchbuffer *batch = pp_context->batch;
3750
3751     BEGIN_BATCH(batch, 1);
3752     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3753     ADVANCE_BATCH(batch);
3754 }
3755
3756 static void
3757 gen6_pp_state_base_address(VADriverContextP ctx,
3758                            struct i965_post_processing_context *pp_context)
3759 {
3760     struct intel_batchbuffer *batch = pp_context->batch;
3761
3762     BEGIN_BATCH(batch, 10);
3763     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3764     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3765     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3766     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3767     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3768     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3769     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3770     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3771     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3772     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3773     ADVANCE_BATCH(batch);
3774 }
3775
3776 static void
3777 gen6_pp_vfe_state(VADriverContextP ctx,
3778                   struct i965_post_processing_context *pp_context)
3779 {
3780     struct intel_batchbuffer *batch = pp_context->batch;
3781
3782     BEGIN_BATCH(batch, 8);
3783     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3784     OUT_BATCH(batch, 0);
3785     OUT_BATCH(batch,
3786               (pp_context->urb.num_vfe_entries - 1) << 16 |
3787               pp_context->urb.num_vfe_entries << 8);
3788     OUT_BATCH(batch, 0);
3789     OUT_BATCH(batch,
3790               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3791               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3792     OUT_BATCH(batch, 0);
3793     OUT_BATCH(batch, 0);
3794     OUT_BATCH(batch, 0);
3795     ADVANCE_BATCH(batch);
3796 }
3797
3798 static void
3799 gen6_pp_curbe_load(VADriverContextP ctx,
3800                    struct i965_post_processing_context *pp_context)
3801 {
3802     struct intel_batchbuffer *batch = pp_context->batch;
3803
3804     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3805
3806     BEGIN_BATCH(batch, 4);
3807     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3808     OUT_BATCH(batch, 0);
3809     OUT_BATCH(batch,
3810               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3811     OUT_RELOC(batch, 
3812               pp_context->curbe.bo,
3813               I915_GEM_DOMAIN_INSTRUCTION, 0,
3814               0);
3815     ADVANCE_BATCH(batch);
3816 }
3817
3818 static void
3819 gen6_interface_descriptor_load(VADriverContextP ctx,
3820                                struct i965_post_processing_context *pp_context)
3821 {
3822     struct intel_batchbuffer *batch = pp_context->batch;
3823
3824     BEGIN_BATCH(batch, 4);
3825     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3826     OUT_BATCH(batch, 0);
3827     OUT_BATCH(batch,
3828               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3829     OUT_RELOC(batch, 
3830               pp_context->idrt.bo,
3831               I915_GEM_DOMAIN_INSTRUCTION, 0,
3832               0);
3833     ADVANCE_BATCH(batch);
3834 }
3835
3836 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
3837 {
3838     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3839
3840     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3841     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
3842     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
3843     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
3844     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3845     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
3846
3847     /* 1 x N */
3848     if (x_steps == 1) {
3849         if (y == y_steps-1) {
3850             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
3851         }
3852         else {
3853             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
3854         }
3855     }
3856
3857     /* M x 1 */
3858     if (y_steps == 1) {
3859         if (x == 0) { // all blocks in this group are on the left edge
3860             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
3861             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
3862         }
3863         else if (x == x_steps-1) {
3864             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
3865             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
3866         }
3867         else {
3868             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3869             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3870             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
3871         }
3872     }
3873
3874 }
3875
3876 static void
3877 gen6_pp_object_walker(VADriverContextP ctx,
3878                       struct i965_post_processing_context *pp_context)
3879 {
3880     struct i965_driver_data *i965 = i965_driver_data(ctx);
3881     struct intel_batchbuffer *batch = pp_context->batch;
3882     int x, x_steps, y, y_steps;
3883     int param_size, command_length_in_dws;
3884     dri_bo *command_buffer;
3885     unsigned int *command_ptr;
3886
3887     if (IS_GEN7(i965->intel.device_id))
3888         param_size = sizeof(struct gen7_pp_inline_parameter);
3889     else
3890         param_size = sizeof(struct pp_inline_parameter);
3891
3892     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3893     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3894     command_length_in_dws = 6 + (param_size >> 2);
3895     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3896                                   "command objects buffer",
3897                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3898                                   4096);
3899
3900     dri_bo_map(command_buffer, 1);
3901     command_ptr = command_buffer->virtual;
3902
3903     for (y = 0; y < y_steps; y++) {
3904         for (x = 0; x < x_steps; x++) {
3905             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3906                 // some common block parameter update goes here, apply to all pp functions
3907                 update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
3908                 
3909                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3910                 *command_ptr++ = 0;
3911                 *command_ptr++ = 0;
3912                 *command_ptr++ = 0;
3913                 *command_ptr++ = 0;
3914                 *command_ptr++ = 0;
3915                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3916                 command_ptr += (param_size >> 2);
3917             }
3918         }
3919     }
3920
3921     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3922         *command_ptr++ = 0;
3923
3924     *command_ptr = MI_BATCH_BUFFER_END;
3925
3926     dri_bo_unmap(command_buffer);
3927
3928     BEGIN_BATCH(batch, 2);
3929     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3930     OUT_RELOC(batch, command_buffer, 
3931               I915_GEM_DOMAIN_COMMAND, 0, 
3932               0);
3933     ADVANCE_BATCH(batch);
3934     
3935     dri_bo_unreference(command_buffer);
3936
3937     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3938      * will cause control to pass back to ring buffer 
3939      */
3940     intel_batchbuffer_end_atomic(batch);
3941     intel_batchbuffer_flush(batch);
3942     intel_batchbuffer_start_atomic(batch, 0x1000);
3943 }
3944
3945 static void
3946 gen6_pp_pipeline_setup(VADriverContextP ctx,
3947                        struct i965_post_processing_context *pp_context)
3948 {
3949     struct intel_batchbuffer *batch = pp_context->batch;
3950
3951     intel_batchbuffer_start_atomic(batch, 0x1000);
3952     intel_batchbuffer_emit_mi_flush(batch);
3953     gen6_pp_pipeline_select(ctx, pp_context);
3954     gen6_pp_state_base_address(ctx, pp_context);
3955     gen6_pp_vfe_state(ctx, pp_context);
3956     gen6_pp_curbe_load(ctx, pp_context);
3957     gen6_interface_descriptor_load(ctx, pp_context);
3958     gen6_pp_object_walker(ctx, pp_context);
3959     intel_batchbuffer_end_atomic(batch);
3960 }
3961
3962 static VAStatus
3963 gen6_post_processing(
3964     VADriverContextP   ctx,
3965     struct i965_post_processing_context *pp_context,
3966     const struct i965_surface *src_surface,
3967     const VARectangle *src_rect,
3968     struct i965_surface *dst_surface,
3969     const VARectangle *dst_rect,
3970     int                pp_index,
3971     void * filter_param
3972 )
3973 {
3974     VAStatus va_status;
3975     
3976     va_status = gen6_pp_initialize(ctx, pp_context,
3977                                    src_surface,
3978                                    src_rect,
3979                                    dst_surface,
3980                                    dst_rect,
3981                                    pp_index,
3982                                    filter_param);
3983
3984     if (va_status == VA_STATUS_SUCCESS) {
3985         gen6_pp_states_setup(ctx, pp_context);
3986         gen6_pp_pipeline_setup(ctx, pp_context);
3987     }
3988
3989     return va_status;
3990 }
3991
3992 static VAStatus
3993 i965_post_processing_internal(
3994     VADriverContextP   ctx,
3995     struct i965_post_processing_context *pp_context,
3996     const struct i965_surface *src_surface,
3997     const VARectangle *src_rect,
3998     struct i965_surface *dst_surface,
3999     const VARectangle *dst_rect,
4000     int                pp_index,
4001     void *filter_param
4002 )
4003 {
4004     struct i965_driver_data *i965 = i965_driver_data(ctx);
4005     VAStatus va_status;
4006
4007     if (IS_GEN6(i965->intel.device_id) ||
4008         IS_GEN7(i965->intel.device_id))
4009         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4010     else
4011         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4012     
4013     return va_status;
4014 }
4015
4016 VAStatus 
4017 i965_DestroySurfaces(VADriverContextP ctx,
4018                      VASurfaceID *surface_list,
4019                      int num_surfaces);
4020 VAStatus 
4021 i965_CreateSurfaces(VADriverContextP ctx,
4022                     int width,
4023                     int height,
4024                     int format,
4025                     int num_surfaces,
4026                     VASurfaceID *surfaces);
4027
4028 static void
4029 rgb_to_yuv(unsigned int argb,
4030            unsigned char *y,
4031            unsigned char *u,
4032            unsigned char *v,
4033            unsigned char *a)
4034 {
4035     int r = ((argb >> 16) & 0xff);
4036     int g = ((argb >> 8) & 0xff);
4037     int b = ((argb >> 0) & 0xff);
4038     
4039     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4040     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4041     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4042     *a = ((argb >> 24) & 0xff);
4043 }
4044
4045 static void 
4046 i965_vpp_clear_surface(VADriverContextP ctx,
4047                        struct i965_post_processing_context *pp_context,
4048                        VASurfaceID surface,
4049                        unsigned int color)
4050 {
4051     struct i965_driver_data *i965 = i965_driver_data(ctx);
4052     struct intel_batchbuffer *batch = pp_context->batch;
4053     struct object_surface *obj_surface = SURFACE(surface);
4054     unsigned int blt_cmd, br13;
4055     unsigned int tiling = 0, swizzle = 0;
4056     int pitch;
4057     unsigned char y, u, v, a = 0;
4058
4059     /* Currently only support NV12 surface */
4060     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4061         return;
4062
4063     rgb_to_yuv(color, &y, &u, &v, &a);
4064
4065     if (a == 0)
4066         return;
4067
4068     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4069     blt_cmd = XY_COLOR_BLT_CMD;
4070     pitch = obj_surface->width;
4071
4072     if (tiling != I915_TILING_NONE) {
4073         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4074         pitch >>= 2;
4075     }
4076
4077     br13 = 0xf0 << 16;
4078     br13 |= BR13_8;
4079     br13 |= pitch;
4080
4081     if (IS_GEN6(i965->intel.device_id) ||
4082         IS_GEN7(i965->intel.device_id)) {
4083         intel_batchbuffer_start_atomic_blt(batch, 48);
4084         BEGIN_BLT_BATCH(batch, 12);
4085     } else {
4086         intel_batchbuffer_start_atomic(batch, 48);
4087         BEGIN_BATCH(batch, 12);
4088     }
4089
4090     OUT_BATCH(batch, blt_cmd);
4091     OUT_BATCH(batch, br13);
4092     OUT_BATCH(batch,
4093               0 << 16 |
4094               0);
4095     OUT_BATCH(batch,
4096               obj_surface->height << 16 |
4097               obj_surface->width);
4098     OUT_RELOC(batch, obj_surface->bo, 
4099               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4100               0);
4101     OUT_BATCH(batch, y);
4102
4103     br13 = 0xf0 << 16;
4104     br13 |= BR13_565;
4105     br13 |= pitch;
4106
4107     OUT_BATCH(batch, blt_cmd);
4108     OUT_BATCH(batch, br13);
4109     OUT_BATCH(batch,
4110               0 << 16 |
4111               0);
4112     OUT_BATCH(batch,
4113               obj_surface->height / 2 << 16 |
4114               obj_surface->width / 2);
4115     OUT_RELOC(batch, obj_surface->bo, 
4116               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4117               obj_surface->width * obj_surface->y_cb_offset);
4118     OUT_BATCH(batch, v << 8 | u);
4119
4120     ADVANCE_BATCH(batch);
4121     intel_batchbuffer_end_atomic(batch);
4122 }
4123
4124 VASurfaceID
4125 i965_post_processing(
4126     VADriverContextP   ctx,
4127     VASurfaceID        surface,
4128     const VARectangle *src_rect,
4129     const VARectangle *dst_rect,
4130     unsigned int       flags,
4131     int               *has_done_scaling  
4132 )
4133 {
4134     struct i965_driver_data *i965 = i965_driver_data(ctx);
4135     VASurfaceID in_surface_id = surface;
4136     VASurfaceID out_surface_id = VA_INVALID_ID;
4137     
4138     *has_done_scaling = 0;
4139
4140     if (HAS_PP(i965)) {
4141         struct object_surface *obj_surface;
4142         VAStatus status;
4143         struct i965_surface src_surface;
4144         struct i965_surface dst_surface;
4145
4146         obj_surface = SURFACE(in_surface_id);
4147
4148         /* Currently only support post processing for NV12 surface */
4149         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4150             return out_surface_id;
4151
4152         _i965LockMutex(&i965->pp_mutex);
4153
4154         if (flags & I965_PP_FLAG_MCDI) {
4155             status = i965_CreateSurfaces(ctx,
4156                                          obj_surface->orig_width,
4157                                          obj_surface->orig_height,
4158                                          VA_RT_FORMAT_YUV420,
4159                                          1,
4160                                          &out_surface_id);
4161             assert(status == VA_STATUS_SUCCESS);
4162             obj_surface = SURFACE(out_surface_id);
4163             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4164             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4165             src_surface.id = in_surface_id;
4166             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4167             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4168                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4169             dst_surface.id = out_surface_id;
4170             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4171             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4172
4173             i965_post_processing_internal(ctx, i965->pp_context,
4174                                           &src_surface,
4175                                           src_rect,
4176                                           &dst_surface,
4177                                           dst_rect,
4178                                           PP_NV12_DNDI,
4179                                           NULL);
4180         }
4181
4182         if (flags & I965_PP_FLAG_AVS) {
4183             struct i965_render_state *render_state = &i965->render_state;
4184             struct intel_region *dest_region = render_state->draw_region;
4185
4186             if (out_surface_id != VA_INVALID_ID)
4187                 in_surface_id = out_surface_id;
4188
4189             status = i965_CreateSurfaces(ctx,
4190                                          dest_region->width,
4191                                          dest_region->height,
4192                                          VA_RT_FORMAT_YUV420,
4193                                          1,
4194                                          &out_surface_id);
4195             assert(status == VA_STATUS_SUCCESS);
4196             obj_surface = SURFACE(out_surface_id);
4197             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4198             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4199             src_surface.id = in_surface_id;
4200             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4201             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4202             dst_surface.id = out_surface_id;
4203             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4204             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4205
4206             i965_post_processing_internal(ctx, i965->pp_context,
4207                                           &src_surface,
4208                                           src_rect,
4209                                           &dst_surface,
4210                                           dst_rect,
4211                                           PP_NV12_AVS,
4212                                           NULL);
4213
4214             if (in_surface_id != surface)
4215                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4216                 
4217             *has_done_scaling = 1;
4218         }
4219
4220         _i965UnlockMutex(&i965->pp_mutex);
4221     }
4222
4223     return out_surface_id;
4224 }       
4225
4226 static VAStatus
4227 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4228                           const struct i965_surface *src_surface,
4229                           const VARectangle *src_rect,
4230                           struct i965_surface *dst_surface,
4231                           const VARectangle *dst_rect)
4232 {
4233     struct i965_driver_data *i965 = i965_driver_data(ctx);
4234     struct i965_post_processing_context *pp_context = i965->pp_context;
4235     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4236
4237     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4238         i965_post_processing_internal(ctx, i965->pp_context,
4239                                       src_surface,
4240                                       src_rect,
4241                                       dst_surface,
4242                                       dst_rect,
4243                                       PP_RGBX_LOAD_SAVE_NV12,
4244                                       NULL);
4245     } else {
4246         assert(0);
4247         return VA_STATUS_ERROR_UNKNOWN;
4248     }
4249
4250     intel_batchbuffer_flush(pp_context->batch);
4251
4252     return VA_STATUS_SUCCESS;
4253 }
4254
4255 static VAStatus
4256 i965_image_pl3_processing(VADriverContextP ctx,
4257                           const struct i965_surface *src_surface,
4258                           const VARectangle *src_rect,
4259                           struct i965_surface *dst_surface,
4260                           const VARectangle *dst_rect)
4261 {
4262     struct i965_driver_data *i965 = i965_driver_data(ctx);
4263     struct i965_post_processing_context *pp_context = i965->pp_context;
4264     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4265     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4266
4267     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4268         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4269                                                  src_surface,
4270                                                  src_rect,
4271                                                  dst_surface,
4272                                                  dst_rect,
4273                                                  PP_PL3_LOAD_SAVE_N12,
4274                                                  NULL);
4275     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4276                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4277                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4278                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4279         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4280                                                  src_surface,
4281                                                  src_rect,
4282                                                  dst_surface,
4283                                                  dst_rect,
4284                                                  PP_PL3_LOAD_SAVE_PL3,
4285                                                  NULL);
4286     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4287                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4288         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4289                                                  src_surface,
4290                                                  src_rect,
4291                                                  dst_surface,
4292                                                  dst_rect,
4293                                                  PP_PL3_LOAD_SAVE_PA,
4294                                                  NULL);
4295     }
4296     else {
4297         assert(0);
4298     }
4299
4300     intel_batchbuffer_flush(pp_context->batch);
4301
4302     return vaStatus;
4303 }
4304
4305 static VAStatus
4306 i965_image_pl2_processing(VADriverContextP ctx,
4307                           const struct i965_surface *src_surface,
4308                           const VARectangle *src_rect,
4309                           struct i965_surface *dst_surface,
4310                           const VARectangle *dst_rect)
4311 {
4312     struct i965_driver_data *i965 = i965_driver_data(ctx);
4313     struct i965_post_processing_context *pp_context = i965->pp_context;
4314     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4315     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4316
4317     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4318         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4319                                                  src_surface,
4320                                                  src_rect,
4321                                                  dst_surface,
4322                                                  dst_rect,
4323                                                  PP_NV12_LOAD_SAVE_N12,
4324                                                  NULL);
4325     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4326                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4327                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4328                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4329         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4330                                                  src_surface,
4331                                                  src_rect,
4332                                                  dst_surface,
4333                                                  dst_rect,
4334                                                  PP_NV12_LOAD_SAVE_PL3,
4335                                                  NULL);
4336     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4337                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4338         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4339                                                  src_surface,
4340                                                  src_rect,
4341                                                  dst_surface,
4342                                                  dst_rect,
4343                                                  PP_NV12_LOAD_SAVE_PA,
4344                                                      NULL);
4345     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4346                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4347                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4348                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4349         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4350                                       src_surface,
4351                                       src_rect,
4352                                       dst_surface,
4353                                       dst_rect,
4354                                       PP_NV12_LOAD_SAVE_RGBX,
4355                                       NULL);
4356     } else {
4357         assert(0);
4358         return VA_STATUS_ERROR_UNKNOWN;
4359     }
4360
4361     intel_batchbuffer_flush(pp_context->batch);
4362
4363     return vaStatus;
4364 }
4365
4366 static VAStatus
4367 i965_image_pl1_processing(VADriverContextP ctx,
4368                           const struct i965_surface *src_surface,
4369                           const VARectangle *src_rect,
4370                           struct i965_surface *dst_surface,
4371                           const VARectangle *dst_rect)
4372 {
4373     struct i965_driver_data *i965 = i965_driver_data(ctx);
4374     struct i965_post_processing_context *pp_context = i965->pp_context;
4375     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4376
4377     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4378         i965_post_processing_internal(ctx, i965->pp_context,
4379                                       src_surface,
4380                                       src_rect,
4381                                       dst_surface,
4382                                       dst_rect,
4383                                       PP_PA_LOAD_SAVE_NV12,
4384                                       NULL);
4385     }
4386     else if (fourcc == VA_FOURCC_YV12) {
4387         i965_post_processing_internal(ctx, i965->pp_context,
4388                                       src_surface,
4389                                       src_rect,
4390                                       dst_surface,
4391                                       dst_rect,
4392                                       PP_PA_LOAD_SAVE_PL3,
4393                                       NULL);
4394
4395     }
4396     else {
4397         return VA_STATUS_ERROR_UNKNOWN;
4398     }
4399
4400     intel_batchbuffer_flush(pp_context->batch);
4401
4402     return VA_STATUS_SUCCESS;
4403 }
4404
4405 VAStatus
4406 i965_image_processing(VADriverContextP ctx,
4407                       const struct i965_surface *src_surface,
4408                       const VARectangle *src_rect,
4409                       struct i965_surface *dst_surface,
4410                       const VARectangle *dst_rect)
4411 {
4412     struct i965_driver_data *i965 = i965_driver_data(ctx);
4413     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4414
4415     if (HAS_PP(i965)) {
4416         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4417
4418         _i965LockMutex(&i965->pp_mutex);
4419
4420         switch (fourcc) {
4421         case VA_FOURCC('Y', 'V', '1', '2'):
4422         case VA_FOURCC('I', '4', '2', '0'):
4423         case VA_FOURCC('I', 'M', 'C', '1'):
4424         case VA_FOURCC('I', 'M', 'C', '3'):
4425             status = i965_image_pl3_processing(ctx,
4426                                                src_surface,
4427                                                src_rect,
4428                                                dst_surface,
4429                                                dst_rect);
4430             break;
4431
4432         case  VA_FOURCC('N', 'V', '1', '2'):
4433             status = i965_image_pl2_processing(ctx,
4434                                                src_surface,
4435                                                src_rect,
4436                                                dst_surface,
4437                                                dst_rect);
4438             break;
4439         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4440         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4441             status = i965_image_pl1_processing(ctx,
4442                                                src_surface,
4443                                                src_rect,
4444                                                dst_surface,
4445                                                dst_rect);
4446             break;
4447         case VA_FOURCC('B', 'G', 'R', 'A'):
4448         case VA_FOURCC('B', 'G', 'R', 'X'):
4449         case VA_FOURCC('R', 'G', 'B', 'A'):
4450         case VA_FOURCC('R', 'G', 'B', 'X'):
4451             status = i965_image_pl1_rgbx_processing(ctx,
4452                                                src_surface,
4453                                                src_rect,
4454                                                dst_surface,
4455                                                dst_rect);
4456             break;
4457         default:
4458             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4459             break;
4460         }
4461         
4462         _i965UnlockMutex(&i965->pp_mutex);
4463     }
4464
4465     return status;
4466 }       
4467
4468 static void
4469 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4470 {
4471     int i;
4472
4473     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4474     pp_context->surface_state_binding_table.bo = NULL;
4475
4476     dri_bo_unreference(pp_context->curbe.bo);
4477     pp_context->curbe.bo = NULL;
4478
4479     dri_bo_unreference(pp_context->sampler_state_table.bo);
4480     pp_context->sampler_state_table.bo = NULL;
4481
4482     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4483     pp_context->sampler_state_table.bo_8x8 = NULL;
4484
4485     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4486     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4487
4488     dri_bo_unreference(pp_context->idrt.bo);
4489     pp_context->idrt.bo = NULL;
4490     pp_context->idrt.num_interface_descriptors = 0;
4491
4492     dri_bo_unreference(pp_context->vfe_state.bo);
4493     pp_context->vfe_state.bo = NULL;
4494
4495     dri_bo_unreference(pp_context->stmm.bo);
4496     pp_context->stmm.bo = NULL;
4497
4498     for (i = 0; i < NUM_PP_MODULES; i++) {
4499         struct pp_module *pp_module = &pp_context->pp_modules[i];
4500
4501         dri_bo_unreference(pp_module->kernel.bo);
4502         pp_module->kernel.bo = NULL;
4503     }
4504
4505     free(pp_context->pp_static_parameter);
4506     free(pp_context->pp_inline_parameter);
4507     pp_context->pp_static_parameter = NULL;
4508     pp_context->pp_inline_parameter = NULL;
4509 }
4510
4511 Bool
4512 i965_post_processing_terminate(VADriverContextP ctx)
4513 {
4514     struct i965_driver_data *i965 = i965_driver_data(ctx);
4515     struct i965_post_processing_context *pp_context = i965->pp_context;
4516
4517     if (pp_context) {
4518         i965_post_processing_context_finalize(pp_context);
4519         free(pp_context);
4520     }
4521
4522     i965->pp_context = NULL;
4523
4524     return True;
4525 }
4526
4527 static void
4528 i965_post_processing_context_init(VADriverContextP ctx,
4529                                   struct i965_post_processing_context *pp_context,
4530                                   struct intel_batchbuffer *batch)
4531 {
4532     struct i965_driver_data *i965 = i965_driver_data(ctx);
4533     int i;
4534
4535     pp_context->urb.size = URB_SIZE((&i965->intel));
4536     pp_context->urb.num_vfe_entries = 32;
4537     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4538     pp_context->urb.num_cs_entries = 1;
4539     
4540     if (IS_GEN7(i965->intel.device_id))
4541         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4542     else
4543         pp_context->urb.size_cs_entry = 2;
4544
4545     pp_context->urb.vfe_start = 0;
4546     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4547         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4548     assert(pp_context->urb.cs_start + 
4549            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4550
4551     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4552     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4553     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4554
4555     if (IS_GEN7(i965->intel.device_id))
4556         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4557     else if (IS_GEN6(i965->intel.device_id))
4558         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4559     else if (IS_IRONLAKE(i965->intel.device_id))
4560         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4561
4562     for (i = 0; i < NUM_PP_MODULES; i++) {
4563         struct pp_module *pp_module = &pp_context->pp_modules[i];
4564         dri_bo_unreference(pp_module->kernel.bo);
4565         if (pp_module->kernel.bin && pp_module->kernel.size) {
4566             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4567                                                 pp_module->kernel.name,
4568                                                 pp_module->kernel.size,
4569                                                 4096);
4570             assert(pp_module->kernel.bo);
4571             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4572         } else {
4573             pp_module->kernel.bo = NULL;
4574         }
4575     }
4576
4577     /* static & inline parameters */
4578     if (IS_GEN7(i965->intel.device_id)) {
4579         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4580         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4581     } else {
4582         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4583         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4584     }
4585
4586     pp_context->batch = batch;
4587 }
4588
4589 Bool
4590 i965_post_processing_init(VADriverContextP ctx)
4591 {
4592     struct i965_driver_data *i965 = i965_driver_data(ctx);
4593     struct i965_post_processing_context *pp_context = i965->pp_context;
4594
4595     if (HAS_PP(i965)) {
4596         if (pp_context == NULL) {
4597             pp_context = calloc(1, sizeof(*pp_context));
4598             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4599             i965->pp_context = pp_context;
4600         }
4601     }
4602
4603     return True;
4604 }
4605
4606 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4607     PP_NULL,    /* VAProcFilterNone */
4608     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4609     PP_NULL,    /* VAProcFilterDeblocking */
4610     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4611     PP_NULL,    /* VAProcFilterSharpening */
4612     PP_NULL,    /* VAProcFilterColorBalance */
4613     PP_NULL,    /* VAProcFilterColorStandard */
4614     PP_NULL,    /* VAProcFilterFrameRateConversion */
4615 };
4616
4617 static const int proc_frame_to_pp_frame[3] = {
4618     I965_SURFACE_FLAG_FRAME,
4619     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4620     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4621 };
4622
4623 static void 
4624 i965_proc_picture(VADriverContextP ctx, 
4625                   VAProfile profile, 
4626                   union codec_state *codec_state,
4627                   struct hw_context *hw_context)
4628 {
4629     struct i965_driver_data *i965 = i965_driver_data(ctx);
4630     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4631     struct proc_state *proc_state = &codec_state->proc;
4632     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4633     struct object_surface *obj_surface;
4634     struct i965_surface src_surface, dst_surface;
4635     VARectangle src_rect, dst_rect;
4636     VAStatus status;
4637     int i;
4638     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4639     int num_tmp_surfaces = 0;
4640     unsigned int tiling = 0, swizzle = 0;
4641     int in_width, in_height;
4642
4643     assert(pipeline_param->surface != VA_INVALID_ID);
4644     assert(proc_state->current_render_target != VA_INVALID_ID);
4645
4646     obj_surface = SURFACE(pipeline_param->surface);
4647     in_width = obj_surface->orig_width;
4648     in_height = obj_surface->orig_height;
4649     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4650
4651     src_surface.id = pipeline_param->surface;
4652     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4653     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4654
4655     VASurfaceID out_surface_id = VA_INVALID_ID;
4656     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4657         src_surface.id = pipeline_param->surface;
4658         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4659         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4660         src_rect.x = 0;
4661         src_rect.y = 0;
4662         src_rect.width = in_width;
4663         src_rect.height = in_height;
4664
4665         status = i965_CreateSurfaces(ctx,
4666                                      in_width,
4667                                      in_height,
4668                                      VA_RT_FORMAT_YUV420,
4669                                      1,
4670                                      &out_surface_id);
4671         assert(status == VA_STATUS_SUCCESS);
4672         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4673         obj_surface = SURFACE(out_surface_id);
4674         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4675
4676         dst_surface.id = out_surface_id;
4677         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4678         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4679         dst_rect.x = 0;
4680         dst_rect.y = 0;
4681         dst_rect.width = in_width;
4682         dst_rect.height = in_height;
4683
4684         status = i965_image_processing(ctx,
4685                                        &src_surface,
4686                                        &src_rect,
4687                                        &dst_surface,
4688                                        &dst_rect);
4689         assert(status == VA_STATUS_SUCCESS);
4690
4691         src_surface.id = out_surface_id;
4692         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4693         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4694     }
4695
4696     if (pipeline_param->surface_region) {
4697         src_rect.x = pipeline_param->surface_region->x;
4698         src_rect.y = pipeline_param->surface_region->y;
4699         src_rect.width = pipeline_param->surface_region->width;
4700         src_rect.height = pipeline_param->surface_region->height;
4701     } else {
4702         src_rect.x = 0;
4703         src_rect.y = 0;
4704         src_rect.width = in_width;
4705         src_rect.height = in_height;
4706     }
4707
4708     if (pipeline_param->output_region) {
4709         dst_rect.x = pipeline_param->output_region->x;
4710         dst_rect.y = pipeline_param->output_region->y;
4711         dst_rect.width = pipeline_param->output_region->width;
4712         dst_rect.height = pipeline_param->output_region->height;
4713     } else {
4714         dst_rect.x = 0;
4715         dst_rect.y = 0;
4716         dst_rect.width = in_width;
4717         dst_rect.height = in_height;
4718     }
4719
4720     for (i = 0; i < pipeline_param->num_filters; i++) {
4721         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4722         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4723         VAProcFilterType filter_type = filter_param->type;
4724         out_surface_id = VA_INVALID_ID;
4725         int kernel_index = procfilter_to_pp_flag[filter_type];
4726
4727         if (kernel_index != PP_NULL &&
4728             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4729             status = i965_CreateSurfaces(ctx,
4730                                          in_width,
4731                                          in_height,
4732                                          VA_RT_FORMAT_YUV420,
4733                                          1,
4734                                          &out_surface_id);
4735             assert(status == VA_STATUS_SUCCESS);
4736             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4737             obj_surface = SURFACE(out_surface_id);
4738             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4739             dst_surface.id = out_surface_id;
4740             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4741             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4742                                                    &src_surface,
4743                                                    &src_rect,
4744                                                    &dst_surface,
4745                                                    &src_rect,
4746                                                    kernel_index,
4747                                                    filter_param);
4748
4749             if (status == VA_STATUS_SUCCESS) {
4750                 src_surface.id = dst_surface.id;
4751                 src_surface.type = dst_surface.type;
4752                 src_surface.flags = dst_surface.flags;
4753             }
4754         }
4755     }
4756
4757     obj_surface = SURFACE(proc_state->current_render_target);
4758     int csc_needed = 0;
4759     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
4760         csc_needed = 1;
4761         out_surface_id = VA_INVALID_ID;
4762         status = i965_CreateSurfaces(ctx,
4763                                      obj_surface->orig_width,
4764                                      obj_surface->orig_height,
4765                                      VA_RT_FORMAT_YUV420, 
4766                                      1,
4767                                      &out_surface_id);
4768         assert(status == VA_STATUS_SUCCESS);
4769         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4770         struct object_surface *csc_surface = SURFACE(out_surface_id);
4771         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4772         dst_surface.id = out_surface_id;
4773     } else {
4774         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4775         dst_surface.id = proc_state->current_render_target;
4776     }
4777
4778     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4779     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4780     if (src_rect.width == dst_rect.width &&
4781         src_rect.height == dst_rect.height) {
4782         i965_post_processing_internal(ctx, &proc_context->pp_context,
4783                                       &src_surface,
4784                                       &src_rect,
4785                                       &dst_surface,
4786                                       &dst_rect,
4787                                       PP_NV12_LOAD_SAVE_N12,
4788                                       NULL);
4789     } else {
4790
4791         i965_post_processing_internal(ctx, &proc_context->pp_context,
4792                                       &src_surface,
4793                                       &src_rect,
4794                                       &dst_surface,
4795                                       &dst_rect,
4796                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4797                                       PP_NV12_AVS : PP_NV12_SCALING,
4798                                       NULL);
4799     }
4800
4801     if (csc_needed) {
4802         src_surface.id = dst_surface.id;
4803         src_surface.type = dst_surface.type;
4804         src_surface.flags = dst_surface.flags;
4805         dst_surface.id = proc_state->current_render_target;
4806         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4807         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
4808     }
4809     
4810     if (num_tmp_surfaces)
4811         i965_DestroySurfaces(ctx,
4812                              tmp_surfaces,
4813                              num_tmp_surfaces);
4814
4815     intel_batchbuffer_flush(hw_context->batch);
4816 }
4817
4818 static void
4819 i965_proc_context_destroy(void *hw_context)
4820 {
4821     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4822
4823     i965_post_processing_context_finalize(&proc_context->pp_context);
4824     intel_batchbuffer_free(proc_context->base.batch);
4825     free(proc_context);
4826 }
4827
4828 struct hw_context *
4829 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4830 {
4831     struct intel_driver_data *intel = intel_driver_data(ctx);
4832     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4833
4834     proc_context->base.destroy = i965_proc_context_destroy;
4835     proc_context->base.run = i965_proc_picture;
4836     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4837     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4838
4839     return (struct hw_context *)proc_context;
4840 }