work around hw limitation(dword alignment) of horizontal offset
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 #define GPU_ASM_BLOCK_WIDTH         16
59 #define GPU_ASM_BLOCK_HEIGHT        8
60 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
61
62 static const uint32_t pp_null_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
68 };
69
70 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
76 };
77
78 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_scaling_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_avs_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dndi_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_dn_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
96 };
97
98 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
108 };
109
110 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
112 };
113
114 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
115 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
116 };
117
118 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
119 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
120 };
121
122 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
123                                    const struct i965_surface *src_surface,
124                                    const VARectangle *src_rect,
125                                    struct i965_surface *dst_surface,
126                                    const VARectangle *dst_rect,
127                                    void *filter_param);
128 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
129                                             const struct i965_surface *src_surface,
130                                             const VARectangle *src_rect,
131                                             struct i965_surface *dst_surface,
132                                             const VARectangle *dst_rect,
133                                             void *filter_param);
134 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
135                                            const struct i965_surface *src_surface,
136                                            const VARectangle *src_rect,
137                                            struct i965_surface *dst_surface,
138                                            const VARectangle *dst_rect,
139                                            void *filter_param);
140 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
141                                              const struct i965_surface *src_surface,
142                                              const VARectangle *src_rect,
143                                              struct i965_surface *dst_surface,
144                                              const VARectangle *dst_rect,
145                                              void *filter_param);
146 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
147                                                 const struct i965_surface *src_surface,
148                                                 const VARectangle *src_rect,
149                                                 struct i965_surface *dst_surface,
150                                                 const VARectangle *dst_rect,
151                                                 void *filter_param);
152 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
153                                         const struct i965_surface *src_surface,
154                                         const VARectangle *src_rect,
155                                         struct i965_surface *dst_surface,
156                                         const VARectangle *dst_rect,
157                                         void *filter_param);
158 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
159                                       const struct i965_surface *src_surface,
160                                       const VARectangle *src_rect,
161                                       struct i965_surface *dst_surface,
162                                       const VARectangle *dst_rect,
163                                       void *filter_param);
164
165 static struct pp_module pp_modules_gen5[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen5,
171             sizeof(pp_null_gen5),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12_NV12",
181             PP_NV12_LOAD_SAVE_N12,
182             pp_nv12_load_save_nv12_gen5,
183             sizeof(pp_nv12_load_save_nv12_gen5),
184             NULL,
185         },
186
187         pp_plx_load_save_plx_initialize,
188     },
189
190     {
191         {
192             "NV12_PL3",
193             PP_NV12_LOAD_SAVE_PL3,
194             pp_nv12_load_save_pl3_gen5,
195             sizeof(pp_nv12_load_save_pl3_gen5),
196             NULL,
197         },
198
199         pp_plx_load_save_plx_initialize,
200     },
201
202     {
203         {
204             "PL3_NV12",
205             PP_PL3_LOAD_SAVE_N12,
206             pp_pl3_load_save_nv12_gen5,
207             sizeof(pp_pl3_load_save_nv12_gen5),
208             NULL,
209         },
210
211         pp_plx_load_save_plx_initialize,
212     },
213
214     {
215         {
216             "PL3_PL3",
217             PP_PL3_LOAD_SAVE_N12,
218             pp_pl3_load_save_pl3_gen5,
219             sizeof(pp_pl3_load_save_pl3_gen5),
220             NULL,
221         },
222
223         pp_plx_load_save_plx_initialize
224     },
225
226     {
227         {
228             "NV12 Scaling module",
229             PP_NV12_SCALING,
230             pp_nv12_scaling_gen5,
231             sizeof(pp_nv12_scaling_gen5),
232             NULL,
233         },
234
235         pp_nv12_scaling_initialize,
236     },
237
238     {
239         {
240             "NV12 AVS module",
241             PP_NV12_AVS,
242             pp_nv12_avs_gen5,
243             sizeof(pp_nv12_avs_gen5),
244             NULL,
245         },
246
247         pp_nv12_avs_initialize_nlas,
248     },
249
250     {
251         {
252             "NV12 DNDI module",
253             PP_NV12_DNDI,
254             pp_nv12_dndi_gen5,
255             sizeof(pp_nv12_dndi_gen5),
256             NULL,
257         },
258
259         pp_nv12_dndi_initialize,
260     },
261
262     {
263         {
264             "NV12 DN module",
265             PP_NV12_DN,
266             pp_nv12_dn_gen5,
267             sizeof(pp_nv12_dn_gen5),
268             NULL,
269         },
270
271         pp_nv12_dn_initialize,
272     },
273
274     {
275         {
276             "NV12_PA module",
277             PP_NV12_LOAD_SAVE_PA,
278             pp_nv12_load_save_pa_gen5,
279             sizeof(pp_nv12_load_save_pa_gen5),
280             NULL,
281         },
282     
283         pp_plx_load_save_plx_initialize,
284     },
285
286     {
287         {
288             "PL3_PA module",
289             PP_PL3_LOAD_SAVE_PA,
290             pp_pl3_load_save_pa_gen5,
291             sizeof(pp_pl3_load_save_pa_gen5),
292             NULL,
293         },
294     
295         pp_plx_load_save_plx_initialize,
296     },
297
298     {
299         {
300             "PA_NV12 module",
301             PP_PA_LOAD_SAVE_NV12,
302             pp_pa_load_save_nv12_gen5,
303             sizeof(pp_pa_load_save_nv12_gen5),
304             NULL,
305         },
306     
307         pp_plx_load_save_plx_initialize,
308     },
309
310     {
311         {
312             "PA_PL3 module",
313             PP_PA_LOAD_SAVE_PL3,
314             pp_pa_load_save_pl3_gen5,
315             sizeof(pp_pa_load_save_pl3_gen5),
316             NULL,
317         },
318     
319         pp_plx_load_save_plx_initialize,
320     },
321
322     {
323         {
324             "RGBX_NV12 module",
325             PP_RGBX_LOAD_SAVE_NV12,
326             pp_rgbx_load_save_nv12_gen5,
327             sizeof(pp_rgbx_load_save_nv12_gen5),
328             NULL,
329         },
330     
331         pp_plx_load_save_plx_initialize,
332     },
333             
334     {
335         {
336             "NV12_RGBX module",
337             PP_NV12_LOAD_SAVE_RGBX,
338             pp_nv12_load_save_rgbx_gen5,
339             sizeof(pp_nv12_load_save_rgbx_gen5),
340             NULL,
341         },
342     
343         pp_plx_load_save_plx_initialize,
344     },
345                     
346 };
347
348 static const uint32_t pp_null_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/null.g6b"
350 };
351
352 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
358 };
359
360 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
362 };
363
364 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
366 };
367
368 static const uint32_t pp_nv12_scaling_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
370 };
371
372 static const uint32_t pp_nv12_avs_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
374 };
375
376 static const uint32_t pp_nv12_dndi_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
378 };
379
380 static const uint32_t pp_nv12_dn_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
382 };
383
384 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
385 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
386 };
387
388 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
389 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
390 };
391
392 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
393 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
394 };
395
396 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
397 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
398 };
399
400 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
401 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
402 };
403
404 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
405 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
406 };
407
408 static struct pp_module pp_modules_gen6[] = {
409     {
410         {
411             "NULL module (for testing)",
412             PP_NULL,
413             pp_null_gen6,
414             sizeof(pp_null_gen6),
415             NULL,
416         },
417
418         pp_null_initialize,
419     },
420
421     {
422         {
423             "NV12_NV12",
424             PP_NV12_LOAD_SAVE_N12,
425             pp_nv12_load_save_nv12_gen6,
426             sizeof(pp_nv12_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "NV12_PL3",
436             PP_NV12_LOAD_SAVE_PL3,
437             pp_nv12_load_save_pl3_gen6,
438             sizeof(pp_nv12_load_save_pl3_gen6),
439             NULL,
440         },
441         
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "PL3_NV12",
448             PP_PL3_LOAD_SAVE_N12,
449             pp_pl3_load_save_nv12_gen6,
450             sizeof(pp_pl3_load_save_nv12_gen6),
451             NULL,
452         },
453
454         pp_plx_load_save_plx_initialize,
455     },
456
457     {
458         {
459             "PL3_PL3",
460             PP_PL3_LOAD_SAVE_N12,
461             pp_pl3_load_save_pl3_gen6,
462             sizeof(pp_pl3_load_save_pl3_gen6),
463             NULL,
464         },
465
466         pp_plx_load_save_plx_initialize,
467     },
468
469     {
470         {
471             "NV12 Scaling module",
472             PP_NV12_SCALING,
473             pp_nv12_scaling_gen6,
474             sizeof(pp_nv12_scaling_gen6),
475             NULL,
476         },
477
478         gen6_nv12_scaling_initialize,
479     },
480
481     {
482         {
483             "NV12 AVS module",
484             PP_NV12_AVS,
485             pp_nv12_avs_gen6,
486             sizeof(pp_nv12_avs_gen6),
487             NULL,
488         },
489
490         pp_nv12_avs_initialize_nlas,
491     },
492
493     {
494         {
495             "NV12 DNDI module",
496             PP_NV12_DNDI,
497             pp_nv12_dndi_gen6,
498             sizeof(pp_nv12_dndi_gen6),
499             NULL,
500         },
501
502         pp_nv12_dndi_initialize,
503     },
504
505     {
506         {
507             "NV12 DN module",
508             PP_NV12_DN,
509             pp_nv12_dn_gen6,
510             sizeof(pp_nv12_dn_gen6),
511             NULL,
512         },
513
514         pp_nv12_dn_initialize,
515     },
516     {
517         {
518             "NV12_PA module",
519             PP_NV12_LOAD_SAVE_PA,
520             pp_nv12_load_save_pa_gen6,
521             sizeof(pp_nv12_load_save_pa_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527     
528     {
529         {
530             "PL3_PA module",
531             PP_PL3_LOAD_SAVE_PA,
532             pp_pl3_load_save_pa_gen6,
533             sizeof(pp_pl3_load_save_pa_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "PA_NV12 module",
543             PP_PA_LOAD_SAVE_NV12,
544             pp_pa_load_save_nv12_gen6,
545             sizeof(pp_pa_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552     {
553         {
554             "PA_PL3 module",
555             PP_PA_LOAD_SAVE_PL3,
556             pp_pa_load_save_pl3_gen6,
557             sizeof(pp_pa_load_save_pl3_gen6),
558             NULL,
559         },
560     
561         pp_plx_load_save_plx_initialize,
562     },
563     
564     {
565         {
566             "RGBX_NV12 module",
567             PP_RGBX_LOAD_SAVE_NV12,
568             pp_rgbx_load_save_nv12_gen6,
569             sizeof(pp_rgbx_load_save_nv12_gen6),
570             NULL,
571         },
572     
573         pp_plx_load_save_plx_initialize,
574     },
575
576     {
577         {
578             "NV12_RGBX module",
579             PP_NV12_LOAD_SAVE_RGBX,
580             pp_nv12_load_save_rgbx_gen6,
581             sizeof(pp_nv12_load_save_rgbx_gen6),
582             NULL,
583         },
584     
585         pp_plx_load_save_plx_initialize,
586     },
587 };
588
589 static const uint32_t pp_null_gen7[][4] = {
590 };
591
592 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
593 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
594 };
595
596 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
598 };
599
600 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
601 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
602 };
603
604 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
605 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
606 };
607
608 static const uint32_t pp_nv12_scaling_gen7[][4] = {
609 #include "shaders/post_processing/gen7/avs.g7b"
610 };
611
612 static const uint32_t pp_nv12_avs_gen7[][4] = {
613 #include "shaders/post_processing/gen7/avs.g7b"
614 };
615
616 static const uint32_t pp_nv12_dndi_gen7[][4] = {
617 #include "shaders/post_processing/gen7/dndi.g7b"
618 };
619
620 static const uint32_t pp_nv12_dn_gen7[][4] = {
621 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
622 };
623 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
624 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
625 };
626 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
627 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
628 };
629 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
630 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
631 };
632 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
633 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
634 };
635 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
636 };
637 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
638 };
639
640 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
641                                            const struct i965_surface *src_surface,
642                                            const VARectangle *src_rect,
643                                            struct i965_surface *dst_surface,
644                                            const VARectangle *dst_rect,
645                                            void *filter_param);
646 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
647                                              const struct i965_surface *src_surface,
648                                              const VARectangle *src_rect,
649                                              struct i965_surface *dst_surface,
650                                              const VARectangle *dst_rect,
651                                              void *filter_param);
652 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
653                                            const struct i965_surface *src_surface,
654                                            const VARectangle *src_rect,
655                                            struct i965_surface *dst_surface,
656                                            const VARectangle *dst_rect,
657                                            void *filter_param);
658
659 static struct pp_module pp_modules_gen7[] = {
660     {
661         {
662             "NULL module (for testing)",
663             PP_NULL,
664             pp_null_gen7,
665             sizeof(pp_null_gen7),
666             NULL,
667         },
668
669         pp_null_initialize,
670     },
671
672     {
673         {
674             "NV12_NV12",
675             PP_NV12_LOAD_SAVE_N12,
676             pp_nv12_load_save_nv12_gen7,
677             sizeof(pp_nv12_load_save_nv12_gen7),
678             NULL,
679         },
680
681         gen7_pp_plx_avs_initialize,
682     },
683
684     {
685         {
686             "NV12_PL3",
687             PP_NV12_LOAD_SAVE_PL3,
688             pp_nv12_load_save_pl3_gen7,
689             sizeof(pp_nv12_load_save_pl3_gen7),
690             NULL,
691         },
692         
693         gen7_pp_plx_avs_initialize,
694     },
695
696     {
697         {
698             "PL3_NV12",
699             PP_PL3_LOAD_SAVE_N12,
700             pp_pl3_load_save_nv12_gen7,
701             sizeof(pp_pl3_load_save_nv12_gen7),
702             NULL,
703         },
704
705         gen7_pp_plx_avs_initialize,
706     },
707
708     {
709         {
710             "PL3_PL3",
711             PP_PL3_LOAD_SAVE_N12,
712             pp_pl3_load_save_pl3_gen7,
713             sizeof(pp_pl3_load_save_pl3_gen7),
714             NULL,
715         },
716
717         gen7_pp_plx_avs_initialize,
718     },
719
720     {
721         {
722             "NV12 Scaling module",
723             PP_NV12_SCALING,
724             pp_nv12_scaling_gen7,
725             sizeof(pp_nv12_scaling_gen7),
726             NULL,
727         },
728
729         gen7_pp_plx_avs_initialize,
730     },
731
732     {
733         {
734             "NV12 AVS module",
735             PP_NV12_AVS,
736             pp_nv12_avs_gen7,
737             sizeof(pp_nv12_avs_gen7),
738             NULL,
739         },
740
741         gen7_pp_plx_avs_initialize,
742     },
743
744     {
745         {
746             "NV12 DNDI module",
747             PP_NV12_DNDI,
748             pp_nv12_dndi_gen7,
749             sizeof(pp_nv12_dndi_gen7),
750             NULL,
751         },
752
753         gen7_pp_nv12_dndi_initialize,
754     },
755
756     {
757         {
758             "NV12 DN module",
759             PP_NV12_DN,
760             pp_nv12_dn_gen7,
761             sizeof(pp_nv12_dn_gen7),
762             NULL,
763         },
764
765         gen7_pp_nv12_dn_initialize,
766     },
767     {
768         {
769             "NV12_PA module",
770             PP_NV12_LOAD_SAVE_PA,
771             pp_nv12_load_save_pa_gen7,
772             sizeof(pp_nv12_load_save_pa_gen7),
773             NULL,
774         },
775     
776         gen7_pp_plx_avs_initialize,
777     },
778
779     {
780         {
781             "PL3_PA module",
782             PP_PL3_LOAD_SAVE_PA,
783             pp_pl3_load_save_pa_gen7,
784             sizeof(pp_pl3_load_save_pa_gen7),
785             NULL,
786         },
787     
788         gen7_pp_plx_avs_initialize,
789     },
790
791     {
792         {
793             "PA_NV12 module",
794             PP_PA_LOAD_SAVE_NV12,
795             pp_pa_load_save_nv12_gen7,
796             sizeof(pp_pa_load_save_nv12_gen7),
797             NULL,
798         },
799     
800         gen7_pp_plx_avs_initialize,
801     },
802
803     {
804         {
805             "PA_PL3 module",
806             PP_PA_LOAD_SAVE_PL3,
807             pp_pa_load_save_pl3_gen7,
808             sizeof(pp_pa_load_save_pl3_gen7),
809             NULL,
810         },
811     
812         gen7_pp_plx_avs_initialize,
813     },
814     
815     {
816         {
817             "RGBX_NV12 module",
818             PP_RGBX_LOAD_SAVE_NV12,
819             pp_rgbx_load_save_nv12_gen7,
820             sizeof(pp_rgbx_load_save_nv12_gen7),
821             NULL,
822         },
823     
824         pp_plx_load_save_plx_initialize,
825     },
826
827     {
828         {
829             "NV12_RGBX module",
830             PP_NV12_LOAD_SAVE_RGBX,
831             pp_nv12_load_save_rgbx_gen7,
832             sizeof(pp_nv12_load_save_rgbx_gen7),
833             NULL,
834         },
835     
836         pp_plx_load_save_plx_initialize,
837     },
838             
839 };
840
841 static int
842 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     int fourcc;
846
847     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
848         struct object_image *obj_image = IMAGE(surface->id);
849         fourcc = obj_image->image.format.fourcc;
850     } else {
851         struct object_surface *obj_surface = SURFACE(surface->id);
852         fourcc = obj_surface->fourcc;
853     }
854
855     return fourcc;
856 }
857
858 static void
859 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
860 {
861     switch (tiling) {
862     case I915_TILING_NONE:
863         ss->ss3.tiled_surface = 0;
864         ss->ss3.tile_walk = 0;
865         break;
866     case I915_TILING_X:
867         ss->ss3.tiled_surface = 1;
868         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
869         break;
870     case I915_TILING_Y:
871         ss->ss3.tiled_surface = 1;
872         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
873         break;
874     }
875 }
876
877 static void
878 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
879 {
880     switch (tiling) {
881     case I915_TILING_NONE:
882         ss->ss2.tiled_surface = 0;
883         ss->ss2.tile_walk = 0;
884         break;
885     case I915_TILING_X:
886         ss->ss2.tiled_surface = 1;
887         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
888         break;
889     case I915_TILING_Y:
890         ss->ss2.tiled_surface = 1;
891         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
892         break;
893     }
894 }
895
896 static void
897 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
898 {
899     switch (tiling) {
900     case I915_TILING_NONE:
901         ss->ss0.tiled_surface = 0;
902         ss->ss0.tile_walk = 0;
903         break;
904     case I915_TILING_X:
905         ss->ss0.tiled_surface = 1;
906         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
907         break;
908     case I915_TILING_Y:
909         ss->ss0.tiled_surface = 1;
910         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
911         break;
912     }
913 }
914
915 static void
916 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
917 {
918     switch (tiling) {
919     case I915_TILING_NONE:
920         ss->ss2.tiled_surface = 0;
921         ss->ss2.tile_walk = 0;
922         break;
923     case I915_TILING_X:
924         ss->ss2.tiled_surface = 1;
925         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
926         break;
927     case I915_TILING_Y:
928         ss->ss2.tiled_surface = 1;
929         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
930         break;
931     }
932 }
933
934 static void
935 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
936 {
937     struct i965_interface_descriptor *desc;
938     dri_bo *bo;
939     int pp_index = pp_context->current_pp;
940
941     bo = pp_context->idrt.bo;
942     dri_bo_map(bo, 1);
943     assert(bo->virtual);
944     desc = bo->virtual;
945     memset(desc, 0, sizeof(*desc));
946     desc->desc0.grf_reg_blocks = 10;
947     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
948     desc->desc1.const_urb_entry_read_offset = 0;
949     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
950     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
951     desc->desc2.sampler_count = 0;
952     desc->desc3.binding_table_entry_count = 0;
953     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
954
955     dri_bo_emit_reloc(bo,
956                       I915_GEM_DOMAIN_INSTRUCTION, 0,
957                       desc->desc0.grf_reg_blocks,
958                       offsetof(struct i965_interface_descriptor, desc0),
959                       pp_context->pp_modules[pp_index].kernel.bo);
960
961     dri_bo_emit_reloc(bo,
962                       I915_GEM_DOMAIN_INSTRUCTION, 0,
963                       desc->desc2.sampler_count << 2,
964                       offsetof(struct i965_interface_descriptor, desc2),
965                       pp_context->sampler_state_table.bo);
966
967     dri_bo_unmap(bo);
968     pp_context->idrt.num_interface_descriptors++;
969 }
970
971 static void
972 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
973 {
974     struct i965_vfe_state *vfe_state;
975     dri_bo *bo;
976
977     bo = pp_context->vfe_state.bo;
978     dri_bo_map(bo, 1);
979     assert(bo->virtual);
980     vfe_state = bo->virtual;
981     memset(vfe_state, 0, sizeof(*vfe_state));
982     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
983     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
984     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
985     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
986     vfe_state->vfe1.children_present = 0;
987     vfe_state->vfe2.interface_descriptor_base = 
988         pp_context->idrt.bo->offset >> 4; /* reloc */
989     dri_bo_emit_reloc(bo,
990                       I915_GEM_DOMAIN_INSTRUCTION, 0,
991                       0,
992                       offsetof(struct i965_vfe_state, vfe2),
993                       pp_context->idrt.bo);
994     dri_bo_unmap(bo);
995 }
996
997 static void
998 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
999 {
1000     unsigned char *constant_buffer;
1001     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1002
1003     assert(sizeof(*pp_static_parameter) == 128);
1004     dri_bo_map(pp_context->curbe.bo, 1);
1005     assert(pp_context->curbe.bo->virtual);
1006     constant_buffer = pp_context->curbe.bo->virtual;
1007     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1008     dri_bo_unmap(pp_context->curbe.bo);
1009 }
1010
1011 static void
1012 ironlake_pp_states_setup(VADriverContextP ctx,
1013                          struct i965_post_processing_context *pp_context)
1014 {
1015     ironlake_pp_interface_descriptor_table(pp_context);
1016     ironlake_pp_vfe_state(pp_context);
1017     ironlake_pp_upload_constants(pp_context);
1018 }
1019
1020 static void
1021 ironlake_pp_pipeline_select(VADriverContextP ctx,
1022                             struct i965_post_processing_context *pp_context)
1023 {
1024     struct intel_batchbuffer *batch = pp_context->batch;
1025
1026     BEGIN_BATCH(batch, 1);
1027     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1028     ADVANCE_BATCH(batch);
1029 }
1030
1031 static void
1032 ironlake_pp_urb_layout(VADriverContextP ctx,
1033                        struct i965_post_processing_context *pp_context)
1034 {
1035     struct intel_batchbuffer *batch = pp_context->batch;
1036     unsigned int vfe_fence, cs_fence;
1037
1038     vfe_fence = pp_context->urb.cs_start;
1039     cs_fence = pp_context->urb.size;
1040
1041     BEGIN_BATCH(batch, 3);
1042     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1043     OUT_BATCH(batch, 0);
1044     OUT_BATCH(batch, 
1045               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1046               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1047     ADVANCE_BATCH(batch);
1048 }
1049
1050 static void
1051 ironlake_pp_state_base_address(VADriverContextP ctx,
1052                                struct i965_post_processing_context *pp_context)
1053 {
1054     struct intel_batchbuffer *batch = pp_context->batch;
1055
1056     BEGIN_BATCH(batch, 8);
1057     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1058     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1059     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1060     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1061     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1062     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1063     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1064     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1065     ADVANCE_BATCH(batch);
1066 }
1067
1068 static void
1069 ironlake_pp_state_pointers(VADriverContextP ctx,
1070                            struct i965_post_processing_context *pp_context)
1071 {
1072     struct intel_batchbuffer *batch = pp_context->batch;
1073
1074     BEGIN_BATCH(batch, 3);
1075     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1076     OUT_BATCH(batch, 0);
1077     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1078     ADVANCE_BATCH(batch);
1079 }
1080
1081 static void 
1082 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1083                           struct i965_post_processing_context *pp_context)
1084 {
1085     struct intel_batchbuffer *batch = pp_context->batch;
1086
1087     BEGIN_BATCH(batch, 2);
1088     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1089     OUT_BATCH(batch,
1090               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1091               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1092     ADVANCE_BATCH(batch);
1093 }
1094
1095 static void
1096 ironlake_pp_constant_buffer(VADriverContextP ctx,
1097                             struct i965_post_processing_context *pp_context)
1098 {
1099     struct intel_batchbuffer *batch = pp_context->batch;
1100
1101     BEGIN_BATCH(batch, 2);
1102     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1103     OUT_RELOC(batch, pp_context->curbe.bo,
1104               I915_GEM_DOMAIN_INSTRUCTION, 0,
1105               pp_context->urb.size_cs_entry - 1);
1106     ADVANCE_BATCH(batch);    
1107 }
1108
1109 static void
1110 ironlake_pp_object_walker(VADriverContextP ctx,
1111                           struct i965_post_processing_context *pp_context)
1112 {
1113     struct intel_batchbuffer *batch = pp_context->batch;
1114     int x, x_steps, y, y_steps;
1115     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1116
1117     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1118     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1119
1120     for (y = 0; y < y_steps; y++) {
1121         for (x = 0; x < x_steps; x++) {
1122             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1123                 BEGIN_BATCH(batch, 20);
1124                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1125                 OUT_BATCH(batch, 0);
1126                 OUT_BATCH(batch, 0); /* no indirect data */
1127                 OUT_BATCH(batch, 0);
1128
1129                 /* inline data grf 5-6 */
1130                 assert(sizeof(*pp_inline_parameter) == 64);
1131                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1132
1133                 ADVANCE_BATCH(batch);
1134             }
1135         }
1136     }
1137 }
1138
1139 static void
1140 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1141                            struct i965_post_processing_context *pp_context)
1142 {
1143     struct intel_batchbuffer *batch = pp_context->batch;
1144
1145     intel_batchbuffer_start_atomic(batch, 0x1000);
1146     intel_batchbuffer_emit_mi_flush(batch);
1147     ironlake_pp_pipeline_select(ctx, pp_context);
1148     ironlake_pp_state_base_address(ctx, pp_context);
1149     ironlake_pp_state_pointers(ctx, pp_context);
1150     ironlake_pp_urb_layout(ctx, pp_context);
1151     ironlake_pp_cs_urb_layout(ctx, pp_context);
1152     ironlake_pp_constant_buffer(ctx, pp_context);
1153     ironlake_pp_object_walker(ctx, pp_context);
1154     intel_batchbuffer_end_atomic(batch);
1155 }
1156
1157 // update u/v offset when the surface format are packed yuv
1158 static void i965_update_src_surface_static_parameter(
1159     VADriverContextP    ctx, 
1160     struct i965_post_processing_context *pp_context,
1161     const struct i965_surface *surface)
1162 {
1163     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1164     int fourcc = pp_get_surface_fourcc(ctx, surface);
1165
1166     switch (fourcc) {
1167     case VA_FOURCC('Y', 'U', 'Y', '2'):
1168         pp_static_parameter->grf1.source_packed_u_offset = 1;
1169         pp_static_parameter->grf1.source_packed_v_offset = 3;
1170         break;
1171     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1172         pp_static_parameter->grf1.source_packed_y_offset = 1;
1173         pp_static_parameter->grf1.source_packed_v_offset = 2;
1174         break;
1175     case VA_FOURCC('B', 'G', 'R', 'X'):
1176     case VA_FOURCC('B', 'G', 'R', 'A'):
1177         pp_static_parameter->grf1.source_rgb_layout = 0;
1178         break;
1179     case VA_FOURCC('R', 'G', 'B', 'X'):
1180     case VA_FOURCC('R', 'G', 'B', 'A'):
1181         pp_static_parameter->grf1.source_rgb_layout = 1;
1182         break;
1183     default:
1184         break;
1185     }
1186     
1187 }
1188
1189 static void i965_update_dst_surface_static_parameter(
1190     VADriverContextP    ctx, 
1191     struct i965_post_processing_context *pp_context,
1192     const struct i965_surface *surface)
1193 {
1194     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1195     int fourcc = pp_get_surface_fourcc(ctx, surface);
1196
1197     switch (fourcc) {
1198     case VA_FOURCC('Y', 'U', 'Y', '2'):
1199         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1200         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1201         break;
1202     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1203         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1204         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1205         break;
1206     case VA_FOURCC('B', 'G', 'R', 'X'):
1207     case VA_FOURCC('B', 'G', 'R', 'A'):
1208         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1209         break;
1210     case VA_FOURCC('R', 'G', 'B', 'X'):
1211     case VA_FOURCC('R', 'G', 'B', 'A'):
1212         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1213         break;
1214     default:
1215         break;
1216     }
1217     
1218 }
1219
1220 static void
1221 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1222                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1223                           int width, int height, int pitch, int format, 
1224                           int index, int is_target)
1225 {
1226     struct i965_surface_state *ss;
1227     dri_bo *ss_bo;
1228     unsigned int tiling;
1229     unsigned int swizzle;
1230
1231     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1232     ss_bo = pp_context->surface_state_binding_table.bo;
1233     assert(ss_bo);
1234
1235     dri_bo_map(ss_bo, True);
1236     assert(ss_bo->virtual);
1237     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1238     memset(ss, 0, sizeof(*ss));
1239     ss->ss0.surface_type = I965_SURFACE_2D;
1240     ss->ss0.surface_format = format;
1241     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1242     ss->ss2.width = width - 1;
1243     ss->ss2.height = height - 1;
1244     ss->ss3.pitch = pitch - 1;
1245     pp_set_surface_tiling(ss, tiling);
1246     dri_bo_emit_reloc(ss_bo,
1247                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1248                       surf_bo_offset,
1249                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1250                       surf_bo);
1251     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1252     dri_bo_unmap(ss_bo);
1253 }
1254
1255 static void
1256 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1257                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1258                            int width, int height, int wpitch,
1259                            int xoffset, int yoffset,
1260                            int format, int interleave_chroma,
1261                            int index)
1262 {
1263     struct i965_surface_state2 *ss2;
1264     dri_bo *ss2_bo;
1265     unsigned int tiling;
1266     unsigned int swizzle;
1267
1268     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1269     ss2_bo = pp_context->surface_state_binding_table.bo;
1270     assert(ss2_bo);
1271
1272     dri_bo_map(ss2_bo, True);
1273     assert(ss2_bo->virtual);
1274     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1275     memset(ss2, 0, sizeof(*ss2));
1276     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1277     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1278     ss2->ss1.width = width - 1;
1279     ss2->ss1.height = height - 1;
1280     ss2->ss2.pitch = wpitch - 1;
1281     ss2->ss2.interleave_chroma = interleave_chroma;
1282     ss2->ss2.surface_format = format;
1283     ss2->ss3.x_offset_for_cb = xoffset;
1284     ss2->ss3.y_offset_for_cb = yoffset;
1285     pp_set_surface2_tiling(ss2, tiling);
1286     dri_bo_emit_reloc(ss2_bo,
1287                       I915_GEM_DOMAIN_RENDER, 0,
1288                       surf_bo_offset,
1289                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1290                       surf_bo);
1291     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1292     dri_bo_unmap(ss2_bo);
1293 }
1294
1295 static void
1296 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1297                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1298                           int width, int height, int pitch, int format, 
1299                           int index, int is_target)
1300 {
1301     struct gen7_surface_state *ss;
1302     dri_bo *ss_bo;
1303     unsigned int tiling;
1304     unsigned int swizzle;
1305
1306     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1307     ss_bo = pp_context->surface_state_binding_table.bo;
1308     assert(ss_bo);
1309
1310     dri_bo_map(ss_bo, True);
1311     assert(ss_bo->virtual);
1312     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1313     memset(ss, 0, sizeof(*ss));
1314     ss->ss0.surface_type = I965_SURFACE_2D;
1315     ss->ss0.surface_format = format;
1316     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1317     ss->ss2.width = width - 1;
1318     ss->ss2.height = height - 1;
1319     ss->ss3.pitch = pitch - 1;
1320     gen7_pp_set_surface_tiling(ss, tiling);
1321     dri_bo_emit_reloc(ss_bo,
1322                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1323                       surf_bo_offset,
1324                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1325                       surf_bo);
1326     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1327     dri_bo_unmap(ss_bo);
1328 }
1329
1330 static void
1331 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1332                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1333                            int width, int height, int wpitch,
1334                            int xoffset, int yoffset,
1335                            int format, int interleave_chroma,
1336                            int index)
1337 {
1338     struct gen7_surface_state2 *ss2;
1339     dri_bo *ss2_bo;
1340     unsigned int tiling;
1341     unsigned int swizzle;
1342
1343     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1344     ss2_bo = pp_context->surface_state_binding_table.bo;
1345     assert(ss2_bo);
1346
1347     dri_bo_map(ss2_bo, True);
1348     assert(ss2_bo->virtual);
1349     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1350     memset(ss2, 0, sizeof(*ss2));
1351     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1352     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1353     ss2->ss1.width = width - 1;
1354     ss2->ss1.height = height - 1;
1355     ss2->ss2.pitch = wpitch - 1;
1356     ss2->ss2.interleave_chroma = interleave_chroma;
1357     ss2->ss2.surface_format = format;
1358     ss2->ss3.x_offset_for_cb = xoffset;
1359     ss2->ss3.y_offset_for_cb = yoffset;
1360     gen7_pp_set_surface2_tiling(ss2, tiling);
1361     dri_bo_emit_reloc(ss2_bo,
1362                       I915_GEM_DOMAIN_RENDER, 0,
1363                       surf_bo_offset,
1364                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1365                       surf_bo);
1366     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1367     dri_bo_unmap(ss2_bo);
1368 }
1369
1370 static void 
1371 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1372                                 const struct i965_surface *surface, 
1373                                 int base_index, int is_target,
1374                                 int *width, int *height, int *pitch, int *offset)
1375 {
1376     struct i965_driver_data *i965 = i965_driver_data(ctx);
1377     struct object_surface *obj_surface;
1378     struct object_image *obj_image;
1379     dri_bo *bo;
1380     int fourcc = pp_get_surface_fourcc(ctx, surface);
1381     const int Y = 0;
1382     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1383     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1384     const int UV = 1;
1385     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1386     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1387     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1388                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1389                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1390                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1391     int scale_factor_of_1st_plane_width_in_byte = 1;
1392                               
1393     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1394         obj_surface = SURFACE(surface->id);
1395         bo = obj_surface->bo;
1396         width[0] = obj_surface->orig_width;
1397         height[0] = obj_surface->orig_height;
1398         pitch[0] = obj_surface->width;
1399         offset[0] = 0;
1400
1401         if (full_packed_format) {
1402             scale_factor_of_1st_plane_width_in_byte = 4; 
1403             pitch[0] = obj_surface->width * 4;
1404         }
1405         else if (packed_yuv ) {
1406             scale_factor_of_1st_plane_width_in_byte =  2; 
1407             pitch[0] = obj_surface->width * 2;
1408         }
1409         else if (interleaved_uv) {
1410             width[1] = obj_surface->orig_width;
1411             height[1] = obj_surface->orig_height / 2;
1412             pitch[1] = obj_surface->width;
1413             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1414         } else {
1415             width[1] = obj_surface->orig_width / 2;
1416             height[1] = obj_surface->orig_height / 2;
1417             pitch[1] = obj_surface->width / 2;
1418             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1419             width[2] = obj_surface->orig_width / 2;
1420             height[2] = obj_surface->orig_height / 2;
1421             pitch[2] = obj_surface->width / 2;
1422             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1423         }
1424     } else {
1425         obj_image = IMAGE(surface->id);
1426         bo = obj_image->bo;
1427         width[0] = obj_image->image.width;
1428         height[0] = obj_image->image.height;
1429         pitch[0] = obj_image->image.pitches[0];
1430         offset[0] = obj_image->image.offsets[0];
1431
1432         if (full_packed_format) {
1433             scale_factor_of_1st_plane_width_in_byte = 4;
1434         }
1435         else if (packed_yuv ) {
1436             scale_factor_of_1st_plane_width_in_byte = 2;
1437         }
1438         else if (interleaved_uv) {
1439             width[1] = obj_image->image.width;
1440             height[1] = obj_image->image.height / 2;
1441             pitch[1] = obj_image->image.pitches[1];
1442             offset[1] = obj_image->image.offsets[1];
1443         } else {
1444             width[1] = obj_image->image.width / 2;
1445             height[1] = obj_image->image.height / 2;
1446             pitch[1] = obj_image->image.pitches[1];
1447             offset[1] = obj_image->image.offsets[1];
1448             width[2] = obj_image->image.width / 2;
1449             height[2] = obj_image->image.height / 2;
1450             pitch[2] = obj_image->image.pitches[2];
1451             offset[2] = obj_image->image.offsets[2];
1452         }
1453     }
1454
1455     /* Y surface */
1456     i965_pp_set_surface_state(ctx, pp_context,
1457                               bo, offset[Y],
1458                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1459                               base_index, is_target);
1460
1461     if (!packed_yuv && !full_packed_format) {
1462         if (interleaved_uv) {
1463             i965_pp_set_surface_state(ctx, pp_context,
1464                                       bo, offset[UV],
1465                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1466                                       base_index + 1, is_target);
1467         } else {
1468             /* U surface */
1469             i965_pp_set_surface_state(ctx, pp_context,
1470                                       bo, offset[U],
1471                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1472                                       base_index + 1, is_target);
1473
1474             /* V surface */
1475             i965_pp_set_surface_state(ctx, pp_context,
1476                                       bo, offset[V],
1477                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1478                                       base_index + 2, is_target);
1479         }
1480     }
1481
1482 }
1483
1484 static void 
1485 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1486                                      const struct i965_surface *surface, 
1487                                      int base_index, int is_target,
1488                                      int *width, int *height, int *pitch, int *offset)
1489 {
1490     struct i965_driver_data *i965 = i965_driver_data(ctx);
1491     struct object_surface *obj_surface;
1492     struct object_image *obj_image;
1493     dri_bo *bo;
1494     int fourcc = pp_get_surface_fourcc(ctx, surface);
1495     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1496                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1497     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1498                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1499     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1500     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1501
1502     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1503         obj_surface = SURFACE(surface->id);
1504         bo = obj_surface->bo;
1505         width[0] = obj_surface->orig_width;
1506         height[0] = obj_surface->orig_height;
1507         pitch[0] = obj_surface->width;
1508         offset[0] = 0;
1509
1510         if (packed_yuv) {
1511             if (is_target)
1512                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1513             else
1514                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1515
1516             pitch[0] = obj_surface->width * 2;
1517         }
1518
1519         width[1] = obj_surface->cb_cr_width;
1520         height[1] = obj_surface->cb_cr_height;
1521         pitch[1] = obj_surface->cb_cr_pitch;
1522         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1523
1524         width[2] = obj_surface->cb_cr_width;
1525         height[2] = obj_surface->cb_cr_height;
1526         pitch[2] = obj_surface->cb_cr_pitch;
1527         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1528     } else {
1529         obj_image = IMAGE(surface->id);
1530         bo = obj_image->bo;
1531         width[0] = obj_image->image.width;
1532         height[0] = obj_image->image.height;
1533         pitch[0] = obj_image->image.pitches[0];
1534         offset[0] = obj_image->image.offsets[0];
1535
1536         if (packed_yuv) {
1537             if (is_target)
1538                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1539             else
1540                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1541         } else if (interleaved_uv) {
1542             width[1] = obj_image->image.width / 2;
1543             height[1] = obj_image->image.height / 2;
1544             pitch[1] = obj_image->image.pitches[1];
1545             offset[1] = obj_image->image.offsets[1];
1546         } else {
1547             width[1] = obj_image->image.width / 2;
1548             height[1] = obj_image->image.height / 2;
1549             pitch[1] = obj_image->image.pitches[U];
1550             offset[1] = obj_image->image.offsets[U];
1551             width[2] = obj_image->image.width / 2;
1552             height[2] = obj_image->image.height / 2;
1553             pitch[2] = obj_image->image.pitches[V];
1554             offset[2] = obj_image->image.offsets[V];
1555         }
1556     }
1557
1558     if (is_target) {
1559         gen7_pp_set_surface_state(ctx, pp_context,
1560                                   bo, 0,
1561                                   width[0] / 4, height[0], pitch[0],
1562                                   I965_SURFACEFORMAT_R8_SINT,
1563                                   base_index, 1);
1564
1565         if (!packed_yuv) {
1566             if (interleaved_uv) {
1567                 gen7_pp_set_surface_state(ctx, pp_context,
1568                                           bo, offset[1],
1569                                           width[1] / 2, height[1], pitch[1],
1570                                           I965_SURFACEFORMAT_R8G8_SINT,
1571                                           base_index + 1, 1);
1572             } else {
1573                 gen7_pp_set_surface_state(ctx, pp_context,
1574                                           bo, offset[1],
1575                                           width[1] / 4, height[1], pitch[1],
1576                                           I965_SURFACEFORMAT_R8_SINT,
1577                                           base_index + 1, 1);
1578                 gen7_pp_set_surface_state(ctx, pp_context,
1579                                           bo, offset[2],
1580                                           width[2] / 4, height[2], pitch[2],
1581                                           I965_SURFACEFORMAT_R8_SINT,
1582                                           base_index + 2, 1);
1583             }
1584         }
1585     } else {
1586         int format0 = SURFACE_FORMAT_Y8_UNORM;
1587
1588         switch (fourcc) {
1589         case VA_FOURCC('Y', 'U', 'Y', '2'):
1590             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1591             break;
1592
1593         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1594             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1595             break;
1596
1597         default:
1598             break;
1599         }
1600
1601         gen7_pp_set_surface2_state(ctx, pp_context,
1602                                    bo, offset[0],
1603                                    width[0], height[0], pitch[0],
1604                                    0, 0,
1605                                    format0, 0,
1606                                    base_index);
1607
1608         if (!packed_yuv) {
1609             if (interleaved_uv) {
1610                 gen7_pp_set_surface2_state(ctx, pp_context,
1611                                            bo, offset[1],
1612                                            width[1], height[1], pitch[1],
1613                                            0, 0,
1614                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1615                                            base_index + 1);
1616             } else {
1617                 gen7_pp_set_surface2_state(ctx, pp_context,
1618                                            bo, offset[1],
1619                                            width[1], height[1], pitch[1],
1620                                            0, 0,
1621                                            SURFACE_FORMAT_R8_UNORM, 0,
1622                                            base_index + 1);
1623                 gen7_pp_set_surface2_state(ctx, pp_context,
1624                                            bo, offset[2],
1625                                            width[2], height[2], pitch[2],
1626                                            0, 0,
1627                                            SURFACE_FORMAT_R8_UNORM, 0,
1628                                            base_index + 2);
1629             }
1630         }
1631     }
1632 }
1633
1634 static int
1635 pp_null_x_steps(void *private_context)
1636 {
1637     return 1;
1638 }
1639
1640 static int
1641 pp_null_y_steps(void *private_context)
1642 {
1643     return 1;
1644 }
1645
1646 static int
1647 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1648 {
1649     return 0;
1650 }
1651
1652 static VAStatus
1653 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1654                    const struct i965_surface *src_surface,
1655                    const VARectangle *src_rect,
1656                    struct i965_surface *dst_surface,
1657                    const VARectangle *dst_rect,
1658                    void *filter_param)
1659 {
1660     /* private function & data */
1661     pp_context->pp_x_steps = pp_null_x_steps;
1662     pp_context->pp_y_steps = pp_null_y_steps;
1663     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1664
1665     dst_surface->flags = src_surface->flags;
1666
1667     return VA_STATUS_SUCCESS;
1668 }
1669
1670 static int
1671 pp_load_save_x_steps(void *private_context)
1672 {
1673     return 1;
1674 }
1675
1676 static int
1677 pp_load_save_y_steps(void *private_context)
1678 {
1679     struct pp_load_save_context *pp_load_save_context = private_context;
1680
1681     return pp_load_save_context->dest_h / 8;
1682 }
1683
1684 static int
1685 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1686 {
1687     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1688     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1689
1690     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1691     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1692
1693     return 0;
1694 }
1695
1696 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1697 {
1698     int i;
1699     /* x offset of dest surface must be dword aligned.
1700      * so we have to extend dst surface on left edge, and mask out pixels not interested
1701      */
1702     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1703         pp_context->block_horizontal_mask_left = 0;
1704         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1705         {
1706             pp_context->block_horizontal_mask_left |= 1<<i;
1707         }
1708     }
1709     else {
1710         pp_context->block_horizontal_mask_left = 0xffff;
1711     }
1712     
1713     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1714     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1715         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1716     }
1717     else {
1718         pp_context->block_horizontal_mask_right = 0xffff;
1719     }
1720     
1721     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1722         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1723     }
1724     else {
1725         pp_context->block_vertical_mask_bottom = 0xff;
1726     }
1727
1728 }
1729 static VAStatus
1730 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1731                                 const struct i965_surface *src_surface,
1732                                 const VARectangle *src_rect,
1733                                 struct i965_surface *dst_surface,
1734                                 const VARectangle *dst_rect,
1735                                 void *filter_param)
1736 {
1737     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1738     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1739     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1740     int width[3], height[3], pitch[3], offset[3];
1741     const int Y = 0;
1742
1743     /* source surface */
1744     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1745                                     width, height, pitch, offset);
1746
1747     /* destination surface */
1748     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1749                                     width, height, pitch, offset);
1750
1751     /* private function & data */
1752     pp_context->pp_x_steps = pp_load_save_x_steps;
1753     pp_context->pp_y_steps = pp_load_save_y_steps;
1754     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1755
1756     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
1757     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
1758     pp_load_save_context->dest_y = dst_rect->y;
1759     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
1760     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
1761
1762     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
1763     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
1764
1765     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1766     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1767
1768     // update u/v offset for packed yuv
1769     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
1770     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
1771
1772     dst_surface->flags = src_surface->flags;
1773
1774     return VA_STATUS_SUCCESS;
1775 }
1776
1777 static int
1778 pp_scaling_x_steps(void *private_context)
1779 {
1780     return 1;
1781 }
1782
1783 static int
1784 pp_scaling_y_steps(void *private_context)
1785 {
1786     struct pp_scaling_context *pp_scaling_context = private_context;
1787
1788     return pp_scaling_context->dest_h / 8;
1789 }
1790
1791 static int
1792 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1793 {
1794     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1795     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1796     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1797     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1798     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1799
1800     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1801     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1802     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1803     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1804     
1805     return 0;
1806 }
1807
1808 static VAStatus
1809 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1810                            const struct i965_surface *src_surface,
1811                            const VARectangle *src_rect,
1812                            struct i965_surface *dst_surface,
1813                            const VARectangle *dst_rect,
1814                            void *filter_param)
1815 {
1816     struct i965_driver_data *i965 = i965_driver_data(ctx);
1817     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1818     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1819     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1820     struct object_surface *obj_surface;
1821     struct i965_sampler_state *sampler_state;
1822     int in_w, in_h, in_wpitch, in_hpitch;
1823     int out_w, out_h, out_wpitch, out_hpitch;
1824
1825     /* source surface */
1826     obj_surface = SURFACE(src_surface->id);
1827     in_w = obj_surface->orig_width;
1828     in_h = obj_surface->orig_height;
1829     in_wpitch = obj_surface->width;
1830     in_hpitch = obj_surface->height;
1831
1832     /* source Y surface index 1 */
1833     i965_pp_set_surface_state(ctx, pp_context,
1834                               obj_surface->bo, 0,
1835                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1836                               1, 0);
1837
1838     /* source UV surface index 2 */
1839     i965_pp_set_surface_state(ctx, pp_context,
1840                               obj_surface->bo, in_wpitch * in_hpitch,
1841                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1842                               2, 0);
1843
1844     /* destination surface */
1845     obj_surface = SURFACE(dst_surface->id);
1846     out_w = obj_surface->orig_width;
1847     out_h = obj_surface->orig_height;
1848     out_wpitch = obj_surface->width;
1849     out_hpitch = obj_surface->height;
1850
1851     /* destination Y surface index 7 */
1852     i965_pp_set_surface_state(ctx, pp_context,
1853                               obj_surface->bo, 0,
1854                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1855                               7, 1);
1856
1857     /* destination UV surface index 8 */
1858     i965_pp_set_surface_state(ctx, pp_context,
1859                               obj_surface->bo, out_wpitch * out_hpitch,
1860                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1861                               8, 1);
1862
1863     /* sampler state */
1864     dri_bo_map(pp_context->sampler_state_table.bo, True);
1865     assert(pp_context->sampler_state_table.bo->virtual);
1866     sampler_state = pp_context->sampler_state_table.bo->virtual;
1867
1868     /* SIMD16 Y index 1 */
1869     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1870     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1871     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1872     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1873     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1874
1875     /* SIMD16 UV index 2 */
1876     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1877     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1878     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1879     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1880     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1881
1882     dri_bo_unmap(pp_context->sampler_state_table.bo);
1883
1884     /* private function & data */
1885     pp_context->pp_x_steps = pp_scaling_x_steps;
1886     pp_context->pp_y_steps = pp_scaling_y_steps;
1887     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1888
1889     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
1890     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
1891     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
1892     pp_scaling_context->dest_y = dst_rect->y;
1893     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
1894     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
1895     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
1896     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1897
1898     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1899
1900     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
1901     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1902     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1903
1904     dst_surface->flags = src_surface->flags;
1905
1906     return VA_STATUS_SUCCESS;
1907 }
1908
1909 static int
1910 pp_avs_x_steps(void *private_context)
1911 {
1912     struct pp_avs_context *pp_avs_context = private_context;
1913
1914     return pp_avs_context->dest_w / 16;
1915 }
1916
1917 static int
1918 pp_avs_y_steps(void *private_context)
1919 {
1920     return 1;
1921 }
1922
1923 static int
1924 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1925 {
1926     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1927     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1928     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1929     float src_x_steping, src_y_steping, video_step_delta;
1930     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1931
1932     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1933         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1934         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1935     } else if (tmp_w >= pp_avs_context->dest_w) {
1936         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1937         pp_inline_parameter->grf6.video_step_delta = 0;
1938         
1939         if (x == 0) {
1940             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1941                 pp_avs_context->src_normalized_x;
1942         } else {
1943             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1944             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1945             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1946                 16 * 15 * video_step_delta / 2;
1947         }
1948     } else {
1949         int n0, n1, n2, nls_left, nls_right;
1950         int factor_a = 5, factor_b = 4;
1951         float f;
1952
1953         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1954         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1955         n2 = tmp_w / (16 * factor_a);
1956         nls_left = n0 + n2;
1957         nls_right = n1 + n2;
1958         f = (float) n2 * 16 / tmp_w;
1959         
1960         if (n0 < 5) {
1961             pp_inline_parameter->grf6.video_step_delta = 0.0;
1962
1963             if (x == 0) {
1964                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1965                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1966             } else {
1967                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1968                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1969                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1970                     16 * 15 * video_step_delta / 2;
1971             }
1972         } else {
1973             if (x < nls_left) {
1974                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1975                 float a = f / (nls_left * 16 * factor_b);
1976                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1977                 
1978                 pp_inline_parameter->grf6.video_step_delta = b;
1979
1980                 if (x == 0) {
1981                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1982                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1983                 } else {
1984                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1985                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1986                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1987                         16 * 15 * video_step_delta / 2;
1988                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1989                 }
1990             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1991                 /* scale the center linearly */
1992                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1993                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1994                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1995                     16 * 15 * video_step_delta / 2;
1996                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1997                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1998             } else {
1999                 float a = f / (nls_right * 16 * factor_b);
2000                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2001
2002                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2003                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2004                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2005                     16 * 15 * video_step_delta / 2;
2006                 pp_inline_parameter->grf6.video_step_delta = -b;
2007
2008                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2009                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2010                 else
2011                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2012             }
2013         }
2014     }
2015
2016     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2017     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2018     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2019     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2020
2021     return 0;
2022 }
2023
2024 static VAStatus
2025 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2026                        const struct i965_surface *src_surface,
2027                        const VARectangle *src_rect,
2028                        struct i965_surface *dst_surface,
2029                        const VARectangle *dst_rect,
2030                        void *filter_param,
2031                        int nlas)
2032 {
2033     struct i965_driver_data *i965 = i965_driver_data(ctx);
2034     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2035     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2036     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2037     struct object_surface *obj_surface;
2038     struct i965_sampler_8x8 *sampler_8x8;
2039     struct i965_sampler_8x8_state *sampler_8x8_state;
2040     int index;
2041     int in_w, in_h, in_wpitch, in_hpitch;
2042     int out_w, out_h, out_wpitch, out_hpitch;
2043     int i;
2044
2045     /* surface */
2046     obj_surface = SURFACE(src_surface->id);
2047     in_w = obj_surface->orig_width;
2048     in_h = obj_surface->orig_height;
2049     in_wpitch = obj_surface->width;
2050     in_hpitch = obj_surface->height;
2051
2052     /* source Y surface index 1 */
2053     i965_pp_set_surface2_state(ctx, pp_context,
2054                                obj_surface->bo, 0,
2055                                in_w, in_h, in_wpitch,
2056                                0, 0,
2057                                SURFACE_FORMAT_Y8_UNORM, 0,
2058                                1);
2059
2060     /* source UV surface index 2 */
2061     i965_pp_set_surface2_state(ctx, pp_context,
2062                                obj_surface->bo, in_wpitch * in_hpitch,
2063                                in_w / 2, in_h / 2, in_wpitch,
2064                                0, 0,
2065                                SURFACE_FORMAT_R8B8_UNORM, 0,
2066                                2);
2067
2068     /* destination surface */
2069     obj_surface = SURFACE(dst_surface->id);
2070     out_w = obj_surface->orig_width;
2071     out_h = obj_surface->orig_height;
2072     out_wpitch = obj_surface->width;
2073     out_hpitch = obj_surface->height;
2074     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2075
2076     /* destination Y surface index 7 */
2077     i965_pp_set_surface_state(ctx, pp_context,
2078                               obj_surface->bo, 0,
2079                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2080                               7, 1);
2081
2082     /* destination UV surface index 8 */
2083     i965_pp_set_surface_state(ctx, pp_context,
2084                               obj_surface->bo, out_wpitch * out_hpitch,
2085                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2086                               8, 1);
2087
2088     /* sampler 8x8 state */
2089     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2090     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2091     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2092     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2093     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2094
2095     for (i = 0; i < 17; i++) {
2096         /* for Y channel, currently ignore */
2097         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2098         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2099         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2100         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2101         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2102         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2103         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2104         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2105         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2106         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2107         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2108         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2109         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2110         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2111         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2112         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2113         /* for U/V channel, 0.25 */
2114         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2115         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2116         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2117         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2118         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2119         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2120         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2121         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2122         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2123         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2124         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2125         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2126         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2127         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2128         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2129         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2130     }
2131
2132     sampler_8x8_state->dw136.default_sharpness_level = 0;
2133     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2134     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2135     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2136     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2137
2138     /* sampler 8x8 */
2139     dri_bo_map(pp_context->sampler_state_table.bo, True);
2140     assert(pp_context->sampler_state_table.bo->virtual);
2141     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2142     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2143
2144     /* sample_8x8 Y index 1 */
2145     index = 1;
2146     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2147     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2148     sampler_8x8[index].dw0.ief_bypass = 1;
2149     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2150     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2151     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2152     sampler_8x8[index].dw2.global_noise_estimation = 22;
2153     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2154     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2155     sampler_8x8[index].dw3.strong_edge_weight = 7;
2156     sampler_8x8[index].dw3.regular_weight = 2;
2157     sampler_8x8[index].dw3.non_edge_weight = 0;
2158     sampler_8x8[index].dw3.gain_factor = 40;
2159     sampler_8x8[index].dw4.steepness_boost = 0;
2160     sampler_8x8[index].dw4.steepness_threshold = 0;
2161     sampler_8x8[index].dw4.mr_boost = 0;
2162     sampler_8x8[index].dw4.mr_threshold = 5;
2163     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2164     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2165     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2166     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2167     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2168     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2169     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2170     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2171     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2172     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2173     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2174     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2175     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2176     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2177     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2178     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2179     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2180     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2181     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2182     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2183     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2184     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2185     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2186     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2187     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2188     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2189     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2190     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2191     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2192     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2193     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2194     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2195     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2196     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2197     sampler_8x8[index].dw13.limiter_boost = 0;
2198     sampler_8x8[index].dw13.minimum_limiter = 10;
2199     sampler_8x8[index].dw13.maximum_limiter = 11;
2200     sampler_8x8[index].dw14.clip_limiter = 130;
2201     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2202                       I915_GEM_DOMAIN_RENDER, 
2203                       0,
2204                       0,
2205                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2206                       pp_context->sampler_state_table.bo_8x8);
2207
2208     /* sample_8x8 UV index 2 */
2209     index = 2;
2210     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2211     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2212     sampler_8x8[index].dw0.ief_bypass = 1;
2213     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2214     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2215     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2216     sampler_8x8[index].dw2.global_noise_estimation = 22;
2217     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2218     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2219     sampler_8x8[index].dw3.strong_edge_weight = 7;
2220     sampler_8x8[index].dw3.regular_weight = 2;
2221     sampler_8x8[index].dw3.non_edge_weight = 0;
2222     sampler_8x8[index].dw3.gain_factor = 40;
2223     sampler_8x8[index].dw4.steepness_boost = 0;
2224     sampler_8x8[index].dw4.steepness_threshold = 0;
2225     sampler_8x8[index].dw4.mr_boost = 0;
2226     sampler_8x8[index].dw4.mr_threshold = 5;
2227     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2228     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2229     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2230     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2231     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2232     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2233     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2234     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2235     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2236     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2237     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2238     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2239     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2240     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2241     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2242     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2243     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2244     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2245     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2246     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2247     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2248     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2249     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2250     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2251     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2252     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2253     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2254     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2255     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2256     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2257     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2258     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2259     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2260     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2261     sampler_8x8[index].dw13.limiter_boost = 0;
2262     sampler_8x8[index].dw13.minimum_limiter = 10;
2263     sampler_8x8[index].dw13.maximum_limiter = 11;
2264     sampler_8x8[index].dw14.clip_limiter = 130;
2265     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2266                       I915_GEM_DOMAIN_RENDER, 
2267                       0,
2268                       0,
2269                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2270                       pp_context->sampler_state_table.bo_8x8);
2271
2272     dri_bo_unmap(pp_context->sampler_state_table.bo);
2273
2274     /* private function & data */
2275     pp_context->pp_x_steps = pp_avs_x_steps;
2276     pp_context->pp_y_steps = pp_avs_y_steps;
2277     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2278
2279     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2280     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2281     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2282     pp_avs_context->dest_y = dst_rect->y;
2283     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2284     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2285     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2286     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2287     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2288     pp_avs_context->src_h = src_rect->height;
2289
2290     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2291     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2292
2293     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2294     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2295     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2296     pp_inline_parameter->grf6.video_step_delta = 0.0;
2297
2298     dst_surface->flags = src_surface->flags;
2299
2300     return VA_STATUS_SUCCESS;
2301 }
2302
2303 static VAStatus
2304 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2305                             const struct i965_surface *src_surface,
2306                             const VARectangle *src_rect,
2307                             struct i965_surface *dst_surface,
2308                             const VARectangle *dst_rect,
2309                             void *filter_param)
2310 {
2311     return pp_nv12_avs_initialize(ctx, pp_context,
2312                                   src_surface,
2313                                   src_rect,
2314                                   dst_surface,
2315                                   dst_rect,
2316                                   filter_param,
2317                                   1);
2318 }
2319
2320 static VAStatus
2321 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2322                              const struct i965_surface *src_surface,
2323                              const VARectangle *src_rect,
2324                              struct i965_surface *dst_surface,
2325                              const VARectangle *dst_rect,
2326                              void *filter_param)
2327 {
2328     return pp_nv12_avs_initialize(ctx, pp_context,
2329                                   src_surface,
2330                                   src_rect,
2331                                   dst_surface,
2332                                   dst_rect,
2333                                   filter_param,
2334                                   0);    
2335 }
2336
2337 static int
2338 gen7_pp_avs_x_steps(void *private_context)
2339 {
2340     struct pp_avs_context *pp_avs_context = private_context;
2341
2342     return pp_avs_context->dest_w / 16;
2343 }
2344
2345 static int
2346 gen7_pp_avs_y_steps(void *private_context)
2347 {
2348     struct pp_avs_context *pp_avs_context = private_context;
2349
2350     return pp_avs_context->dest_h / 16;
2351 }
2352
2353 static int
2354 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2355 {
2356     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2357     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2358
2359     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2360     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2361     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2362     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2363
2364     return 0;
2365 }
2366
2367 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2368                                               struct i965_post_processing_context *pp_context,
2369                                               const struct i965_surface *surface)
2370 {
2371     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2372     int fourcc = pp_get_surface_fourcc(ctx, surface);
2373     
2374     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2375         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2376         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2377         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2378     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2379         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2380         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2381         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2382     }
2383 }
2384
2385 static VAStatus
2386 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2387                            const struct i965_surface *src_surface,
2388                            const VARectangle *src_rect,
2389                            struct i965_surface *dst_surface,
2390                            const VARectangle *dst_rect,
2391                            void *filter_param)
2392 {
2393     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2394     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2395     struct gen7_sampler_8x8 *sampler_8x8;
2396     struct i965_sampler_8x8_state *sampler_8x8_state;
2397     int index, i;
2398     int width[3], height[3], pitch[3], offset[3];
2399     int src_width, src_height;
2400
2401     /* source surface */
2402     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2403                                          width, height, pitch, offset);
2404     src_width = width[0];
2405     src_height = height[0];
2406
2407     /* destination surface */
2408     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2409                                          width, height, pitch, offset);
2410
2411     /* sampler 8x8 state */
2412     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2413     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2414     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2415     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2416     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2417
2418     for (i = 0; i < 17; i++) {
2419         /* for Y channel, currently ignore */
2420         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2421         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2422         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2423         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2424         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2425         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2426         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2427         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2428         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2429         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2430         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2431         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2432         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2433         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2434         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2435         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2436         /* for U/V channel, 0.25 */
2437         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2438         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2439         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2440         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2441         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2442         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2443         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2444         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2445         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2446         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2447         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2448         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2449         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2450         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2451         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2452         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2453     }
2454
2455     sampler_8x8_state->dw136.default_sharpness_level = 0;
2456     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2457     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2458     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2459     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2460
2461     /* sampler 8x8 */
2462     dri_bo_map(pp_context->sampler_state_table.bo, True);
2463     assert(pp_context->sampler_state_table.bo->virtual);
2464     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2465     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2466
2467     /* sample_8x8 Y index 4 */
2468     index = 4;
2469     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2470     sampler_8x8[index].dw0.global_noise_estimation = 255;
2471     sampler_8x8[index].dw0.ief_bypass = 1;
2472
2473     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2474
2475     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2476     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2477     sampler_8x8[index].dw2.r5x_coefficient = 9;
2478     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2479     sampler_8x8[index].dw2.r5c_coefficient = 3;
2480
2481     sampler_8x8[index].dw3.r3x_coefficient = 27;
2482     sampler_8x8[index].dw3.r3c_coefficient = 5;
2483     sampler_8x8[index].dw3.gain_factor = 40;
2484     sampler_8x8[index].dw3.non_edge_weight = 1;
2485     sampler_8x8[index].dw3.regular_weight = 2;
2486     sampler_8x8[index].dw3.strong_edge_weight = 7;
2487     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2488
2489     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2490                       I915_GEM_DOMAIN_RENDER, 
2491                       0,
2492                       0,
2493                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2494                       pp_context->sampler_state_table.bo_8x8);
2495
2496     /* sample_8x8 UV index 8 */
2497     index = 8;
2498     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2499     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2500     sampler_8x8[index].dw0.global_noise_estimation = 255;
2501     sampler_8x8[index].dw0.ief_bypass = 1;
2502     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2503     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2504     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2505     sampler_8x8[index].dw2.r5x_coefficient = 9;
2506     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2507     sampler_8x8[index].dw2.r5c_coefficient = 3;
2508     sampler_8x8[index].dw3.r3x_coefficient = 27;
2509     sampler_8x8[index].dw3.r3c_coefficient = 5;
2510     sampler_8x8[index].dw3.gain_factor = 40;
2511     sampler_8x8[index].dw3.non_edge_weight = 1;
2512     sampler_8x8[index].dw3.regular_weight = 2;
2513     sampler_8x8[index].dw3.strong_edge_weight = 7;
2514     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2515
2516     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2517                       I915_GEM_DOMAIN_RENDER, 
2518                       0,
2519                       0,
2520                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2521                       pp_context->sampler_state_table.bo_8x8);
2522
2523     /* sampler_8x8 V, index 12 */
2524     index = 12;
2525     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2526     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2527     sampler_8x8[index].dw0.global_noise_estimation = 255;
2528     sampler_8x8[index].dw0.ief_bypass = 1;
2529     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2530     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2531     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2532     sampler_8x8[index].dw2.r5x_coefficient = 9;
2533     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2534     sampler_8x8[index].dw2.r5c_coefficient = 3;
2535     sampler_8x8[index].dw3.r3x_coefficient = 27;
2536     sampler_8x8[index].dw3.r3c_coefficient = 5;
2537     sampler_8x8[index].dw3.gain_factor = 40;
2538     sampler_8x8[index].dw3.non_edge_weight = 1;
2539     sampler_8x8[index].dw3.regular_weight = 2;
2540     sampler_8x8[index].dw3.strong_edge_weight = 7;
2541     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2542
2543     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2544                       I915_GEM_DOMAIN_RENDER, 
2545                       0,
2546                       0,
2547                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2548                       pp_context->sampler_state_table.bo_8x8);
2549
2550     dri_bo_unmap(pp_context->sampler_state_table.bo);
2551
2552     /* private function & data */
2553     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2554     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2555     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2556
2557     pp_avs_context->dest_x = dst_rect->x;
2558     pp_avs_context->dest_y = dst_rect->y;
2559     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2560     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2561     pp_avs_context->src_w = src_rect->width;
2562     pp_avs_context->src_h = src_rect->height;
2563
2564     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2565     dw = MAX(dw, pp_avs_context->dest_w);
2566
2567     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2568     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2569     pp_static_parameter->grf2.avs_wa_width = dw;
2570     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2571     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2572
2573     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2574     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2575     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2576     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2577
2578     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2579
2580     dst_surface->flags = src_surface->flags;
2581
2582     return VA_STATUS_SUCCESS;
2583 }
2584
2585 static int
2586 pp_dndi_x_steps(void *private_context)
2587 {
2588     return 1;
2589 }
2590
2591 static int
2592 pp_dndi_y_steps(void *private_context)
2593 {
2594     struct pp_dndi_context *pp_dndi_context = private_context;
2595
2596     return pp_dndi_context->dest_h / 4;
2597 }
2598
2599 static int
2600 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2601 {
2602     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2603
2604     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2605     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2606
2607     return 0;
2608 }
2609
2610 static VAStatus
2611 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2612                         const struct i965_surface *src_surface,
2613                         const VARectangle *src_rect,
2614                         struct i965_surface *dst_surface,
2615                         const VARectangle *dst_rect,
2616                         void *filter_param)
2617 {
2618     struct i965_driver_data *i965 = i965_driver_data(ctx);
2619     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2620     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2621     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2622     struct object_surface *obj_surface;
2623     struct i965_sampler_dndi *sampler_dndi;
2624     int index;
2625     int w, h;
2626     int orig_w, orig_h;
2627     int dndi_top_first = 1;
2628
2629     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2630         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2631
2632     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2633         dndi_top_first = 1;
2634     else
2635         dndi_top_first = 0;
2636
2637     /* surface */
2638     obj_surface = SURFACE(src_surface->id);
2639     orig_w = obj_surface->orig_width;
2640     orig_h = obj_surface->orig_height;
2641     w = obj_surface->width;
2642     h = obj_surface->height;
2643
2644     if (pp_context->stmm.bo == NULL) {
2645         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2646                                            "STMM surface",
2647                                            w * h,
2648                                            4096);
2649         assert(pp_context->stmm.bo);
2650     }
2651
2652     /* source UV surface index 2 */
2653     i965_pp_set_surface_state(ctx, pp_context,
2654                               obj_surface->bo, w * h,
2655                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2656                               2, 0);
2657
2658     /* source YUV surface index 4 */
2659     i965_pp_set_surface2_state(ctx, pp_context,
2660                                obj_surface->bo, 0,
2661                                orig_w, orig_h, w,
2662                                0, h,
2663                                SURFACE_FORMAT_PLANAR_420_8, 1,
2664                                4);
2665
2666     /* source STMM surface index 20 */
2667     i965_pp_set_surface_state(ctx, pp_context,
2668                               pp_context->stmm.bo, 0,
2669                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2670                               20, 1);
2671
2672     /* destination surface */
2673     obj_surface = SURFACE(dst_surface->id);
2674     orig_w = obj_surface->orig_width;
2675     orig_h = obj_surface->orig_height;
2676     w = obj_surface->width;
2677     h = obj_surface->height;
2678
2679     /* destination Y surface index 7 */
2680     i965_pp_set_surface_state(ctx, pp_context,
2681                               obj_surface->bo, 0,
2682                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2683                               7, 1);
2684
2685     /* destination UV surface index 8 */
2686     i965_pp_set_surface_state(ctx, pp_context,
2687                               obj_surface->bo, w * h,
2688                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2689                               8, 1);
2690     /* sampler dndi */
2691     dri_bo_map(pp_context->sampler_state_table.bo, True);
2692     assert(pp_context->sampler_state_table.bo->virtual);
2693     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2694     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2695
2696     /* sample dndi index 1 */
2697     index = 0;
2698     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2699     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2700     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2701     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2702
2703     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2704     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2705     sampler_dndi[index].dw1.stmm_c2 = 1;
2706     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2707     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2708
2709     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2710     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2711     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2712     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2713
2714     sampler_dndi[index].dw3.maximum_stmm = 128;
2715     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2716     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2717     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2718     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2719
2720     sampler_dndi[index].dw4.sdi_delta = 8;
2721     sampler_dndi[index].dw4.sdi_threshold = 128;
2722     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2723     sampler_dndi[index].dw4.stmm_shift_up = 0;
2724     sampler_dndi[index].dw4.stmm_shift_down = 0;
2725     sampler_dndi[index].dw4.minimum_stmm = 0;
2726
2727     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2728     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2729     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2730     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2731
2732     sampler_dndi[index].dw6.dn_enable = 1;
2733     sampler_dndi[index].dw6.di_enable = 1;
2734     sampler_dndi[index].dw6.di_partial = 0;
2735     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2736     sampler_dndi[index].dw6.dndi_stream_id = 0;
2737     sampler_dndi[index].dw6.dndi_first_frame = 1;
2738     sampler_dndi[index].dw6.progressive_dn = 0;
2739     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2740     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2741     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2742
2743     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2744     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2745     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2746     sampler_dndi[index].dw7.column_width_minus1 = 0;
2747
2748     dri_bo_unmap(pp_context->sampler_state_table.bo);
2749
2750     /* private function & data */
2751     pp_context->pp_x_steps = pp_dndi_x_steps;
2752     pp_context->pp_y_steps = pp_dndi_y_steps;
2753     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2754
2755     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2756     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2757     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2758     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2759
2760     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2761     pp_inline_parameter->grf5.number_blocks = w / 16;
2762     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2763     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2764
2765     pp_dndi_context->dest_w = w;
2766     pp_dndi_context->dest_h = h;
2767
2768     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2769
2770     return VA_STATUS_SUCCESS;
2771 }
2772
2773 static int
2774 pp_dn_x_steps(void *private_context)
2775 {
2776     return 1;
2777 }
2778
2779 static int
2780 pp_dn_y_steps(void *private_context)
2781 {
2782     struct pp_dn_context *pp_dn_context = private_context;
2783
2784     return pp_dn_context->dest_h / 8;
2785 }
2786
2787 static int
2788 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2789 {
2790     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2791
2792     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2793     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2794
2795     return 0;
2796 }
2797
2798 static VAStatus
2799 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2800                       const struct i965_surface *src_surface,
2801                       const VARectangle *src_rect,
2802                       struct i965_surface *dst_surface,
2803                       const VARectangle *dst_rect,
2804                       void *filter_param)
2805 {
2806     struct i965_driver_data *i965 = i965_driver_data(ctx);
2807     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2808     struct object_surface *obj_surface;
2809     struct i965_sampler_dndi *sampler_dndi;
2810     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2811     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2812     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2813     int index;
2814     int w, h;
2815     int orig_w, orig_h;
2816     int dn_strength = 15;
2817     int dndi_top_first = 1;
2818     int dn_progressive = 0;
2819
2820     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2821         dndi_top_first = 1;
2822         dn_progressive = 1;
2823     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2824         dndi_top_first = 1;
2825         dn_progressive = 0;
2826     } else {
2827         dndi_top_first = 0;
2828         dn_progressive = 0;
2829     }
2830
2831     if (dn_filter_param) {
2832         float value = dn_filter_param->value;
2833         
2834         if (value > 1.0)
2835             value = 1.0;
2836         
2837         if (value < 0.0)
2838             value = 0.0;
2839
2840         dn_strength = (int)(value * 31.0F);
2841     }
2842
2843     /* surface */
2844     obj_surface = SURFACE(src_surface->id);
2845     orig_w = obj_surface->orig_width;
2846     orig_h = obj_surface->orig_height;
2847     w = obj_surface->width;
2848     h = obj_surface->height;
2849
2850     if (pp_context->stmm.bo == NULL) {
2851         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2852                                            "STMM surface",
2853                                            w * h,
2854                                            4096);
2855         assert(pp_context->stmm.bo);
2856     }
2857
2858     /* source UV surface index 2 */
2859     i965_pp_set_surface_state(ctx, pp_context,
2860                               obj_surface->bo, w * h,
2861                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2862                               2, 0);
2863
2864     /* source YUV surface index 4 */
2865     i965_pp_set_surface2_state(ctx, pp_context,
2866                                obj_surface->bo, 0,
2867                                orig_w, orig_h, w,
2868                                0, h,
2869                                SURFACE_FORMAT_PLANAR_420_8, 1,
2870                                4);
2871
2872     /* source STMM surface index 20 */
2873     i965_pp_set_surface_state(ctx, pp_context,
2874                               pp_context->stmm.bo, 0,
2875                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2876                               20, 1);
2877
2878     /* destination surface */
2879     obj_surface = SURFACE(dst_surface->id);
2880     orig_w = obj_surface->orig_width;
2881     orig_h = obj_surface->orig_height;
2882     w = obj_surface->width;
2883     h = obj_surface->height;
2884
2885     /* destination Y surface index 7 */
2886     i965_pp_set_surface_state(ctx, pp_context,
2887                               obj_surface->bo, 0,
2888                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2889                               7, 1);
2890
2891     /* destination UV surface index 8 */
2892     i965_pp_set_surface_state(ctx, pp_context,
2893                               obj_surface->bo, w * h,
2894                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2895                               8, 1);
2896     /* sampler dn */
2897     dri_bo_map(pp_context->sampler_state_table.bo, True);
2898     assert(pp_context->sampler_state_table.bo->virtual);
2899     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2900     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2901
2902     /* sample dndi index 1 */
2903     index = 0;
2904     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2905     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2906     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2907     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2908
2909     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2910     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2911     sampler_dndi[index].dw1.stmm_c2 = 0;
2912     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2913     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2914
2915     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2916     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2917     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2918     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2919
2920     sampler_dndi[index].dw3.maximum_stmm = 128;
2921     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2922     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2923     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2924     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2925
2926     sampler_dndi[index].dw4.sdi_delta = 8;
2927     sampler_dndi[index].dw4.sdi_threshold = 128;
2928     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2929     sampler_dndi[index].dw4.stmm_shift_up = 0;
2930     sampler_dndi[index].dw4.stmm_shift_down = 0;
2931     sampler_dndi[index].dw4.minimum_stmm = 0;
2932
2933     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2934     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2935     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2936     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2937
2938     sampler_dndi[index].dw6.dn_enable = 1;
2939     sampler_dndi[index].dw6.di_enable = 0;
2940     sampler_dndi[index].dw6.di_partial = 0;
2941     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2942     sampler_dndi[index].dw6.dndi_stream_id = 1;
2943     sampler_dndi[index].dw6.dndi_first_frame = 1;
2944     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2945     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2946     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2947     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2948
2949     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2950     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2951     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2952     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2953
2954     dri_bo_unmap(pp_context->sampler_state_table.bo);
2955
2956     /* private function & data */
2957     pp_context->pp_x_steps = pp_dn_x_steps;
2958     pp_context->pp_y_steps = pp_dn_y_steps;
2959     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2960
2961     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2962     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2963     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2964     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2965
2966     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2967     pp_inline_parameter->grf5.number_blocks = w / 16;
2968     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2969     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2970
2971     pp_dn_context->dest_w = w;
2972     pp_dn_context->dest_h = h;
2973
2974     dst_surface->flags = src_surface->flags;
2975     
2976     return VA_STATUS_SUCCESS;
2977 }
2978
2979 static int
2980 gen7_pp_dndi_x_steps(void *private_context)
2981 {
2982     struct pp_dndi_context *pp_dndi_context = private_context;
2983
2984     return pp_dndi_context->dest_w / 16;
2985 }
2986
2987 static int
2988 gen7_pp_dndi_y_steps(void *private_context)
2989 {
2990     struct pp_dndi_context *pp_dndi_context = private_context;
2991
2992     return pp_dndi_context->dest_h / 4;
2993 }
2994
2995 static int
2996 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2997 {
2998     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2999
3000     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3001     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3002
3003     return 0;
3004 }
3005
3006 static VAStatus
3007 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3008                              const struct i965_surface *src_surface,
3009                              const VARectangle *src_rect,
3010                              struct i965_surface *dst_surface,
3011                              const VARectangle *dst_rect,
3012                              void *filter_param)
3013 {
3014     struct i965_driver_data *i965 = i965_driver_data(ctx);
3015     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3016     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3017     struct object_surface *obj_surface;
3018     struct gen7_sampler_dndi *sampler_dndi;
3019     int index;
3020     int w, h;
3021     int orig_w, orig_h;
3022     int dndi_top_first = 1;
3023
3024     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3025         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3026
3027     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3028         dndi_top_first = 1;
3029     else
3030         dndi_top_first = 0;
3031
3032     /* surface */
3033     obj_surface = SURFACE(src_surface->id);
3034     orig_w = obj_surface->orig_width;
3035     orig_h = obj_surface->orig_height;
3036     w = obj_surface->width;
3037     h = obj_surface->height;
3038
3039     if (pp_context->stmm.bo == NULL) {
3040         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3041                                            "STMM surface",
3042                                            w * h,
3043                                            4096);
3044         assert(pp_context->stmm.bo);
3045     }
3046
3047     /* source UV surface index 1 */
3048     gen7_pp_set_surface_state(ctx, pp_context,
3049                               obj_surface->bo, w * h,
3050                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3051                               1, 0);
3052
3053     /* source YUV surface index 3 */
3054     gen7_pp_set_surface2_state(ctx, pp_context,
3055                                obj_surface->bo, 0,
3056                                orig_w, orig_h, w,
3057                                0, h,
3058                                SURFACE_FORMAT_PLANAR_420_8, 1,
3059                                3);
3060
3061     /* source (temporal reference) YUV surface index 4 */
3062     gen7_pp_set_surface2_state(ctx, pp_context,
3063                                obj_surface->bo, 0,
3064                                orig_w, orig_h, w,
3065                                0, h,
3066                                SURFACE_FORMAT_PLANAR_420_8, 1,
3067                                4);
3068
3069     /* STMM / History Statistics input surface, index 5 */
3070     gen7_pp_set_surface_state(ctx, pp_context,
3071                               pp_context->stmm.bo, 0,
3072                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3073                               5, 1);
3074
3075     /* destination surface */
3076     obj_surface = SURFACE(dst_surface->id);
3077     orig_w = obj_surface->orig_width;
3078     orig_h = obj_surface->orig_height;
3079     w = obj_surface->width;
3080     h = obj_surface->height;
3081
3082     /* destination(Previous frame) Y surface index 27 */
3083     gen7_pp_set_surface_state(ctx, pp_context,
3084                               obj_surface->bo, 0,
3085                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3086                               27, 1);
3087
3088     /* destination(Previous frame) UV surface index 28 */
3089     gen7_pp_set_surface_state(ctx, pp_context,
3090                               obj_surface->bo, w * h,
3091                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3092                               28, 1);
3093
3094     /* destination(Current frame) Y surface index 30 */
3095     gen7_pp_set_surface_state(ctx, pp_context,
3096                               obj_surface->bo, 0,
3097                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3098                               30, 1);
3099
3100     /* destination(Current frame) UV surface index 31 */
3101     gen7_pp_set_surface_state(ctx, pp_context,
3102                               obj_surface->bo, w * h,
3103                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3104                               31, 1);
3105
3106     /* STMM output surface, index 33 */
3107     gen7_pp_set_surface_state(ctx, pp_context,
3108                               pp_context->stmm.bo, 0,
3109                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3110                               33, 1);
3111
3112
3113     /* sampler dndi */
3114     dri_bo_map(pp_context->sampler_state_table.bo, True);
3115     assert(pp_context->sampler_state_table.bo->virtual);
3116     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3117     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3118
3119     /* sample dndi index 0 */
3120     index = 0;
3121     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3122     sampler_dndi[index].dw0.dnmh_delt = 8;
3123     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3124     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3125     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3126     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3127
3128     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3129     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3130     sampler_dndi[index].dw1.stmm_c2 = 0;
3131     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3132     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3133
3134     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3135     sampler_dndi[index].dw2.bne_edge_th = 1;
3136     sampler_dndi[index].dw2.smooth_mv_th = 0;
3137     sampler_dndi[index].dw2.sad_tight_th = 5;
3138     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3139     sampler_dndi[index].dw2.good_neighbor_th = 4;
3140
3141     sampler_dndi[index].dw3.maximum_stmm = 128;
3142     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3143     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3144     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3145     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3146
3147     sampler_dndi[index].dw4.sdi_delta = 8;
3148     sampler_dndi[index].dw4.sdi_threshold = 128;
3149     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3150     sampler_dndi[index].dw4.stmm_shift_up = 0;
3151     sampler_dndi[index].dw4.stmm_shift_down = 0;
3152     sampler_dndi[index].dw4.minimum_stmm = 0;
3153
3154     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3155     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3156     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3157     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3158
3159     sampler_dndi[index].dw6.dn_enable = 0;
3160     sampler_dndi[index].dw6.di_enable = 1;
3161     sampler_dndi[index].dw6.di_partial = 0;
3162     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3163     sampler_dndi[index].dw6.dndi_stream_id = 1;
3164     sampler_dndi[index].dw6.dndi_first_frame = 1;
3165     sampler_dndi[index].dw6.progressive_dn = 0;
3166     sampler_dndi[index].dw6.mcdi_enable = 0;
3167     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3168     sampler_dndi[index].dw6.cat_th1 = 0;
3169     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3170     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3171
3172     sampler_dndi[index].dw7.sad_tha = 5;
3173     sampler_dndi[index].dw7.sad_thb = 10;
3174     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3175     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3176     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3177     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3178     sampler_dndi[index].dw7.neighborpixel_th = 10;
3179     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3180
3181     dri_bo_unmap(pp_context->sampler_state_table.bo);
3182
3183     /* private function & data */
3184     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3185     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3186     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3187
3188     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3189     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3190     pp_static_parameter->grf1.di_top_field_first = 0;
3191     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3192
3193     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3194     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3195     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3196
3197     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3198     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3199
3200     pp_dndi_context->dest_w = w;
3201     pp_dndi_context->dest_h = h;
3202
3203     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3204
3205     return VA_STATUS_SUCCESS;
3206 }
3207
3208 static int
3209 gen7_pp_dn_x_steps(void *private_context)
3210 {
3211     struct pp_dn_context *pp_dn_context = private_context;
3212
3213     return pp_dn_context->dest_w / 16;
3214 }
3215
3216 static int
3217 gen7_pp_dn_y_steps(void *private_context)
3218 {
3219     struct pp_dn_context *pp_dn_context = private_context;
3220
3221     return pp_dn_context->dest_h / 4;
3222 }
3223
3224 static int
3225 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3226 {
3227     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3228
3229     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3230     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3231
3232     return 0;
3233 }
3234
3235 static VAStatus
3236 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3237                            const struct i965_surface *src_surface,
3238                            const VARectangle *src_rect,
3239                            struct i965_surface *dst_surface,
3240                            const VARectangle *dst_rect,
3241                            void *filter_param)
3242 {
3243     struct i965_driver_data *i965 = i965_driver_data(ctx);
3244     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3245     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3246     struct object_surface *obj_surface;
3247     struct gen7_sampler_dndi *sampler_dn;
3248     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3249     int index;
3250     int w, h;
3251     int orig_w, orig_h;
3252     int dn_strength = 15;
3253     int dndi_top_first = 1;
3254     int dn_progressive = 0;
3255
3256     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3257         dndi_top_first = 1;
3258         dn_progressive = 1;
3259     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3260         dndi_top_first = 1;
3261         dn_progressive = 0;
3262     } else {
3263         dndi_top_first = 0;
3264         dn_progressive = 0;
3265     }
3266
3267     if (dn_filter_param) {
3268         float value = dn_filter_param->value;
3269         
3270         if (value > 1.0)
3271             value = 1.0;
3272         
3273         if (value < 0.0)
3274             value = 0.0;
3275
3276         dn_strength = (int)(value * 31.0F);
3277     }
3278
3279     /* surface */
3280     obj_surface = SURFACE(src_surface->id);
3281     orig_w = obj_surface->orig_width;
3282     orig_h = obj_surface->orig_height;
3283     w = obj_surface->width;
3284     h = obj_surface->height;
3285
3286     if (pp_context->stmm.bo == NULL) {
3287         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3288                                            "STMM surface",
3289                                            w * h,
3290                                            4096);
3291         assert(pp_context->stmm.bo);
3292     }
3293
3294     /* source UV surface index 1 */
3295     gen7_pp_set_surface_state(ctx, pp_context,
3296                               obj_surface->bo, w * h,
3297                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3298                               1, 0);
3299
3300     /* source YUV surface index 3 */
3301     gen7_pp_set_surface2_state(ctx, pp_context,
3302                                obj_surface->bo, 0,
3303                                orig_w, orig_h, w,
3304                                0, h,
3305                                SURFACE_FORMAT_PLANAR_420_8, 1,
3306                                3);
3307
3308     /* source (temporal reference) YUV surface index 4 */
3309     gen7_pp_set_surface2_state(ctx, pp_context,
3310                                obj_surface->bo, 0,
3311                                orig_w, orig_h, w,
3312                                0, h,
3313                                SURFACE_FORMAT_PLANAR_420_8, 1,
3314                                4);
3315
3316     /* STMM / History Statistics input surface, index 5 */
3317     gen7_pp_set_surface_state(ctx, pp_context,
3318                               pp_context->stmm.bo, 0,
3319                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3320                               5, 1);
3321
3322     /* destination surface */
3323     obj_surface = SURFACE(dst_surface->id);
3324     orig_w = obj_surface->orig_width;
3325     orig_h = obj_surface->orig_height;
3326     w = obj_surface->width;
3327     h = obj_surface->height;
3328
3329     /* destination Y surface index 24 */
3330     gen7_pp_set_surface_state(ctx, pp_context,
3331                               obj_surface->bo, 0,
3332                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3333                               24, 1);
3334
3335     /* destination UV surface index 25 */
3336     gen7_pp_set_surface_state(ctx, pp_context,
3337                               obj_surface->bo, w * h,
3338                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3339                               25, 1);
3340
3341     /* sampler dn */
3342     dri_bo_map(pp_context->sampler_state_table.bo, True);
3343     assert(pp_context->sampler_state_table.bo->virtual);
3344     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3345     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3346
3347     /* sample dn index 1 */
3348     index = 0;
3349     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3350     sampler_dn[index].dw0.dnmh_delt = 8;
3351     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3352     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3353     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3354     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3355
3356     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3357     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3358     sampler_dn[index].dw1.stmm_c2 = 0;
3359     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3360     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3361
3362     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3363     sampler_dn[index].dw2.bne_edge_th = 1;
3364     sampler_dn[index].dw2.smooth_mv_th = 0;
3365     sampler_dn[index].dw2.sad_tight_th = 5;
3366     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3367     sampler_dn[index].dw2.good_neighbor_th = 4;
3368
3369     sampler_dn[index].dw3.maximum_stmm = 128;
3370     sampler_dn[index].dw3.multipler_for_vecm = 2;
3371     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3372     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3373     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3374
3375     sampler_dn[index].dw4.sdi_delta = 8;
3376     sampler_dn[index].dw4.sdi_threshold = 128;
3377     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3378     sampler_dn[index].dw4.stmm_shift_up = 0;
3379     sampler_dn[index].dw4.stmm_shift_down = 0;
3380     sampler_dn[index].dw4.minimum_stmm = 0;
3381
3382     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3383     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3384     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3385     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3386
3387     sampler_dn[index].dw6.dn_enable = 1;
3388     sampler_dn[index].dw6.di_enable = 0;
3389     sampler_dn[index].dw6.di_partial = 0;
3390     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3391     sampler_dn[index].dw6.dndi_stream_id = 1;
3392     sampler_dn[index].dw6.dndi_first_frame = 1;
3393     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3394     sampler_dn[index].dw6.mcdi_enable = 0;
3395     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3396     sampler_dn[index].dw6.cat_th1 = 0;
3397     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3398     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3399
3400     sampler_dn[index].dw7.sad_tha = 5;
3401     sampler_dn[index].dw7.sad_thb = 10;
3402     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3403     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3404     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3405     sampler_dn[index].dw7.vdi_walker_enable = 0;
3406     sampler_dn[index].dw7.neighborpixel_th = 10;
3407     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3408
3409     dri_bo_unmap(pp_context->sampler_state_table.bo);
3410
3411     /* private function & data */
3412     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3413     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3414     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3415
3416     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3417     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3418     pp_static_parameter->grf1.di_top_field_first = 0;
3419     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3420
3421     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3422     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3423     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3424
3425     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3426     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3427
3428     pp_dn_context->dest_w = w;
3429     pp_dn_context->dest_h = h;
3430
3431     dst_surface->flags = src_surface->flags;
3432
3433     return VA_STATUS_SUCCESS;
3434 }
3435
3436 static VAStatus
3437 ironlake_pp_initialize(
3438     VADriverContextP   ctx,
3439     struct i965_post_processing_context *pp_context,
3440     const struct i965_surface *src_surface,
3441     const VARectangle *src_rect,
3442     struct i965_surface *dst_surface,
3443     const VARectangle *dst_rect,
3444     int                pp_index,
3445     void *filter_param
3446 )
3447 {
3448     VAStatus va_status;
3449     struct i965_driver_data *i965 = i965_driver_data(ctx);
3450     struct pp_module *pp_module;
3451     dri_bo *bo;
3452     int static_param_size, inline_param_size;
3453
3454     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3455     bo = dri_bo_alloc(i965->intel.bufmgr,
3456                       "surface state & binding table",
3457                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3458                       4096);
3459     assert(bo);
3460     pp_context->surface_state_binding_table.bo = bo;
3461
3462     dri_bo_unreference(pp_context->curbe.bo);
3463     bo = dri_bo_alloc(i965->intel.bufmgr,
3464                       "constant buffer",
3465                       4096, 
3466                       4096);
3467     assert(bo);
3468     pp_context->curbe.bo = bo;
3469
3470     dri_bo_unreference(pp_context->idrt.bo);
3471     bo = dri_bo_alloc(i965->intel.bufmgr, 
3472                       "interface discriptor", 
3473                       sizeof(struct i965_interface_descriptor), 
3474                       4096);
3475     assert(bo);
3476     pp_context->idrt.bo = bo;
3477     pp_context->idrt.num_interface_descriptors = 0;
3478
3479     dri_bo_unreference(pp_context->sampler_state_table.bo);
3480     bo = dri_bo_alloc(i965->intel.bufmgr, 
3481                       "sampler state table", 
3482                       4096,
3483                       4096);
3484     assert(bo);
3485     dri_bo_map(bo, True);
3486     memset(bo->virtual, 0, bo->size);
3487     dri_bo_unmap(bo);
3488     pp_context->sampler_state_table.bo = bo;
3489
3490     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3491     bo = dri_bo_alloc(i965->intel.bufmgr, 
3492                       "sampler 8x8 state ",
3493                       4096,
3494                       4096);
3495     assert(bo);
3496     pp_context->sampler_state_table.bo_8x8 = bo;
3497
3498     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3499     bo = dri_bo_alloc(i965->intel.bufmgr, 
3500                       "sampler 8x8 state ",
3501                       4096,
3502                       4096);
3503     assert(bo);
3504     pp_context->sampler_state_table.bo_8x8_uv = bo;
3505
3506     dri_bo_unreference(pp_context->vfe_state.bo);
3507     bo = dri_bo_alloc(i965->intel.bufmgr, 
3508                       "vfe state", 
3509                       sizeof(struct i965_vfe_state), 
3510                       4096);
3511     assert(bo);
3512     pp_context->vfe_state.bo = bo;
3513
3514     static_param_size = sizeof(struct pp_static_parameter);
3515     inline_param_size = sizeof(struct pp_inline_parameter);
3516
3517     memset(pp_context->pp_static_parameter, 0, static_param_size);
3518     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3519     
3520     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3521     pp_context->current_pp = pp_index;
3522     pp_module = &pp_context->pp_modules[pp_index];
3523     
3524     if (pp_module->initialize)
3525         va_status = pp_module->initialize(ctx, pp_context,
3526                                           src_surface,
3527                                           src_rect,
3528                                           dst_surface,
3529                                           dst_rect,
3530                                           filter_param);
3531     else
3532         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3533
3534     return va_status;
3535 }
3536
3537 static VAStatus
3538 ironlake_post_processing(
3539     VADriverContextP   ctx,
3540     struct i965_post_processing_context *pp_context,
3541     const struct i965_surface *src_surface,
3542     const VARectangle *src_rect,
3543     struct i965_surface *dst_surface,
3544     const VARectangle *dst_rect,
3545     int                pp_index,
3546     void *filter_param
3547 )
3548 {
3549     VAStatus va_status;
3550
3551     va_status = ironlake_pp_initialize(ctx, pp_context,
3552                                        src_surface,
3553                                        src_rect,
3554                                        dst_surface,
3555                                        dst_rect,
3556                                        pp_index,
3557                                        filter_param);
3558
3559     if (va_status == VA_STATUS_SUCCESS) {
3560         ironlake_pp_states_setup(ctx, pp_context);
3561         ironlake_pp_pipeline_setup(ctx, pp_context);
3562     }
3563
3564     return va_status;
3565 }
3566
3567 static VAStatus
3568 gen6_pp_initialize(
3569     VADriverContextP   ctx,
3570     struct i965_post_processing_context *pp_context,
3571     const struct i965_surface *src_surface,
3572     const VARectangle *src_rect,
3573     struct i965_surface *dst_surface,
3574     const VARectangle *dst_rect,
3575     int                pp_index,
3576     void *filter_param
3577 )
3578 {
3579     VAStatus va_status;
3580     struct i965_driver_data *i965 = i965_driver_data(ctx);
3581     struct pp_module *pp_module;
3582     dri_bo *bo;
3583     int static_param_size, inline_param_size;
3584
3585     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3586     bo = dri_bo_alloc(i965->intel.bufmgr,
3587                       "surface state & binding table",
3588                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3589                       4096);
3590     assert(bo);
3591     pp_context->surface_state_binding_table.bo = bo;
3592
3593     dri_bo_unreference(pp_context->curbe.bo);
3594     bo = dri_bo_alloc(i965->intel.bufmgr,
3595                       "constant buffer",
3596                       4096, 
3597                       4096);
3598     assert(bo);
3599     pp_context->curbe.bo = bo;
3600
3601     dri_bo_unreference(pp_context->idrt.bo);
3602     bo = dri_bo_alloc(i965->intel.bufmgr, 
3603                       "interface discriptor", 
3604                       sizeof(struct gen6_interface_descriptor_data), 
3605                       4096);
3606     assert(bo);
3607     pp_context->idrt.bo = bo;
3608     pp_context->idrt.num_interface_descriptors = 0;
3609
3610     dri_bo_unreference(pp_context->sampler_state_table.bo);
3611     bo = dri_bo_alloc(i965->intel.bufmgr, 
3612                       "sampler state table", 
3613                       4096,
3614                       4096);
3615     assert(bo);
3616     dri_bo_map(bo, True);
3617     memset(bo->virtual, 0, bo->size);
3618     dri_bo_unmap(bo);
3619     pp_context->sampler_state_table.bo = bo;
3620
3621     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3622     bo = dri_bo_alloc(i965->intel.bufmgr, 
3623                       "sampler 8x8 state ",
3624                       4096,
3625                       4096);
3626     assert(bo);
3627     pp_context->sampler_state_table.bo_8x8 = bo;
3628
3629     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3630     bo = dri_bo_alloc(i965->intel.bufmgr, 
3631                       "sampler 8x8 state ",
3632                       4096,
3633                       4096);
3634     assert(bo);
3635     pp_context->sampler_state_table.bo_8x8_uv = bo;
3636
3637     dri_bo_unreference(pp_context->vfe_state.bo);
3638     bo = dri_bo_alloc(i965->intel.bufmgr, 
3639                       "vfe state", 
3640                       sizeof(struct i965_vfe_state), 
3641                       4096);
3642     assert(bo);
3643     pp_context->vfe_state.bo = bo;
3644     
3645     if (IS_GEN7(i965->intel.device_id)) {
3646         static_param_size = sizeof(struct gen7_pp_static_parameter);
3647         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3648     } else {
3649         static_param_size = sizeof(struct pp_static_parameter);
3650         inline_param_size = sizeof(struct pp_inline_parameter);
3651     }
3652
3653     memset(pp_context->pp_static_parameter, 0, static_param_size);
3654     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3655
3656     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3657     pp_context->current_pp = pp_index;
3658     pp_module = &pp_context->pp_modules[pp_index];
3659     
3660     if (pp_module->initialize)
3661         va_status = pp_module->initialize(ctx, pp_context,
3662                                           src_surface,
3663                                           src_rect,
3664                                           dst_surface,
3665                                           dst_rect,
3666                                           filter_param);
3667     else
3668         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3669
3670     calculate_boundary_block_mask(pp_context, dst_rect);
3671     
3672     return va_status;
3673 }
3674
3675 static void
3676 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3677                                    struct i965_post_processing_context *pp_context)
3678 {
3679     struct i965_driver_data *i965 = i965_driver_data(ctx);
3680     struct gen6_interface_descriptor_data *desc;
3681     dri_bo *bo;
3682     int pp_index = pp_context->current_pp;
3683
3684     bo = pp_context->idrt.bo;
3685     dri_bo_map(bo, True);
3686     assert(bo->virtual);
3687     desc = bo->virtual;
3688     memset(desc, 0, sizeof(*desc));
3689     desc->desc0.kernel_start_pointer = 
3690         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3691     desc->desc1.single_program_flow = 1;
3692     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3693     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3694     desc->desc2.sampler_state_pointer = 
3695         pp_context->sampler_state_table.bo->offset >> 5;
3696     desc->desc3.binding_table_entry_count = 0;
3697     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3698     desc->desc4.constant_urb_entry_read_offset = 0;
3699
3700     if (IS_GEN7(i965->intel.device_id))
3701         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3702     else
3703         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3704
3705     dri_bo_emit_reloc(bo,
3706                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3707                       0,
3708                       offsetof(struct gen6_interface_descriptor_data, desc0),
3709                       pp_context->pp_modules[pp_index].kernel.bo);
3710
3711     dri_bo_emit_reloc(bo,
3712                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3713                       desc->desc2.sampler_count << 2,
3714                       offsetof(struct gen6_interface_descriptor_data, desc2),
3715                       pp_context->sampler_state_table.bo);
3716
3717     dri_bo_unmap(bo);
3718     pp_context->idrt.num_interface_descriptors++;
3719 }
3720
3721 static void
3722 gen6_pp_upload_constants(VADriverContextP ctx,
3723                          struct i965_post_processing_context *pp_context)
3724 {
3725     struct i965_driver_data *i965 = i965_driver_data(ctx);
3726     unsigned char *constant_buffer;
3727     int param_size;
3728
3729     assert(sizeof(struct pp_static_parameter) == 128);
3730     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3731
3732     if (IS_GEN7(i965->intel.device_id))
3733         param_size = sizeof(struct gen7_pp_static_parameter);
3734     else
3735         param_size = sizeof(struct pp_static_parameter);
3736
3737     dri_bo_map(pp_context->curbe.bo, 1);
3738     assert(pp_context->curbe.bo->virtual);
3739     constant_buffer = pp_context->curbe.bo->virtual;
3740     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3741     dri_bo_unmap(pp_context->curbe.bo);
3742 }
3743
3744 static void
3745 gen6_pp_states_setup(VADriverContextP ctx,
3746                      struct i965_post_processing_context *pp_context)
3747 {
3748     gen6_pp_interface_descriptor_table(ctx, pp_context);
3749     gen6_pp_upload_constants(ctx, pp_context);
3750 }
3751
3752 static void
3753 gen6_pp_pipeline_select(VADriverContextP ctx,
3754                         struct i965_post_processing_context *pp_context)
3755 {
3756     struct intel_batchbuffer *batch = pp_context->batch;
3757
3758     BEGIN_BATCH(batch, 1);
3759     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3760     ADVANCE_BATCH(batch);
3761 }
3762
3763 static void
3764 gen6_pp_state_base_address(VADriverContextP ctx,
3765                            struct i965_post_processing_context *pp_context)
3766 {
3767     struct intel_batchbuffer *batch = pp_context->batch;
3768
3769     BEGIN_BATCH(batch, 10);
3770     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3771     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3772     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3773     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3774     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3775     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3776     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3777     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3778     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3779     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3780     ADVANCE_BATCH(batch);
3781 }
3782
3783 static void
3784 gen6_pp_vfe_state(VADriverContextP ctx,
3785                   struct i965_post_processing_context *pp_context)
3786 {
3787     struct intel_batchbuffer *batch = pp_context->batch;
3788
3789     BEGIN_BATCH(batch, 8);
3790     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3791     OUT_BATCH(batch, 0);
3792     OUT_BATCH(batch,
3793               (pp_context->urb.num_vfe_entries - 1) << 16 |
3794               pp_context->urb.num_vfe_entries << 8);
3795     OUT_BATCH(batch, 0);
3796     OUT_BATCH(batch,
3797               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3798               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3799     OUT_BATCH(batch, 0);
3800     OUT_BATCH(batch, 0);
3801     OUT_BATCH(batch, 0);
3802     ADVANCE_BATCH(batch);
3803 }
3804
3805 static void
3806 gen6_pp_curbe_load(VADriverContextP ctx,
3807                    struct i965_post_processing_context *pp_context)
3808 {
3809     struct intel_batchbuffer *batch = pp_context->batch;
3810
3811     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3812
3813     BEGIN_BATCH(batch, 4);
3814     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3815     OUT_BATCH(batch, 0);
3816     OUT_BATCH(batch,
3817               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3818     OUT_RELOC(batch, 
3819               pp_context->curbe.bo,
3820               I915_GEM_DOMAIN_INSTRUCTION, 0,
3821               0);
3822     ADVANCE_BATCH(batch);
3823 }
3824
3825 static void
3826 gen6_interface_descriptor_load(VADriverContextP ctx,
3827                                struct i965_post_processing_context *pp_context)
3828 {
3829     struct intel_batchbuffer *batch = pp_context->batch;
3830
3831     BEGIN_BATCH(batch, 4);
3832     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3833     OUT_BATCH(batch, 0);
3834     OUT_BATCH(batch,
3835               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3836     OUT_RELOC(batch, 
3837               pp_context->idrt.bo,
3838               I915_GEM_DOMAIN_INSTRUCTION, 0,
3839               0);
3840     ADVANCE_BATCH(batch);
3841 }
3842
3843 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
3844 {
3845     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3846
3847     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3848     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
3849     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
3850     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
3851     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3852     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
3853
3854     /* 1 x N */
3855     if (x_steps == 1) {
3856         if (y == y_steps-1) {
3857             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
3858         }
3859         else {
3860             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
3861         }
3862     }
3863
3864     /* M x 1 */
3865     if (y_steps == 1) {
3866         if (x == 0) { // all blocks in this group are on the left edge
3867             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
3868             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
3869         }
3870         else if (x == x_steps-1) {
3871             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
3872             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
3873         }
3874         else {
3875             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3876             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
3877             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
3878         }
3879     }
3880
3881 }
3882
3883 static void
3884 gen6_pp_object_walker(VADriverContextP ctx,
3885                       struct i965_post_processing_context *pp_context)
3886 {
3887     struct i965_driver_data *i965 = i965_driver_data(ctx);
3888     struct intel_batchbuffer *batch = pp_context->batch;
3889     int x, x_steps, y, y_steps;
3890     int param_size, command_length_in_dws;
3891     dri_bo *command_buffer;
3892     unsigned int *command_ptr;
3893
3894     if (IS_GEN7(i965->intel.device_id))
3895         param_size = sizeof(struct gen7_pp_inline_parameter);
3896     else
3897         param_size = sizeof(struct pp_inline_parameter);
3898
3899     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3900     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3901     command_length_in_dws = 6 + (param_size >> 2);
3902     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3903                                   "command objects buffer",
3904                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3905                                   4096);
3906
3907     dri_bo_map(command_buffer, 1);
3908     command_ptr = command_buffer->virtual;
3909
3910     for (y = 0; y < y_steps; y++) {
3911         for (x = 0; x < x_steps; x++) {
3912             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3913                 // some common block parameter update goes here, apply to all pp functions
3914                 update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
3915                 
3916                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3917                 *command_ptr++ = 0;
3918                 *command_ptr++ = 0;
3919                 *command_ptr++ = 0;
3920                 *command_ptr++ = 0;
3921                 *command_ptr++ = 0;
3922                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3923                 command_ptr += (param_size >> 2);
3924             }
3925         }
3926     }
3927
3928     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3929         *command_ptr++ = 0;
3930
3931     *command_ptr = MI_BATCH_BUFFER_END;
3932
3933     dri_bo_unmap(command_buffer);
3934
3935     BEGIN_BATCH(batch, 2);
3936     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3937     OUT_RELOC(batch, command_buffer, 
3938               I915_GEM_DOMAIN_COMMAND, 0, 
3939               0);
3940     ADVANCE_BATCH(batch);
3941     
3942     dri_bo_unreference(command_buffer);
3943
3944     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3945      * will cause control to pass back to ring buffer 
3946      */
3947     intel_batchbuffer_end_atomic(batch);
3948     intel_batchbuffer_flush(batch);
3949     intel_batchbuffer_start_atomic(batch, 0x1000);
3950 }
3951
3952 static void
3953 gen6_pp_pipeline_setup(VADriverContextP ctx,
3954                        struct i965_post_processing_context *pp_context)
3955 {
3956     struct intel_batchbuffer *batch = pp_context->batch;
3957
3958     intel_batchbuffer_start_atomic(batch, 0x1000);
3959     intel_batchbuffer_emit_mi_flush(batch);
3960     gen6_pp_pipeline_select(ctx, pp_context);
3961     gen6_pp_state_base_address(ctx, pp_context);
3962     gen6_pp_vfe_state(ctx, pp_context);
3963     gen6_pp_curbe_load(ctx, pp_context);
3964     gen6_interface_descriptor_load(ctx, pp_context);
3965     gen6_pp_object_walker(ctx, pp_context);
3966     intel_batchbuffer_end_atomic(batch);
3967 }
3968
3969 static VAStatus
3970 gen6_post_processing(
3971     VADriverContextP   ctx,
3972     struct i965_post_processing_context *pp_context,
3973     const struct i965_surface *src_surface,
3974     const VARectangle *src_rect,
3975     struct i965_surface *dst_surface,
3976     const VARectangle *dst_rect,
3977     int                pp_index,
3978     void * filter_param
3979 )
3980 {
3981     VAStatus va_status;
3982     
3983     va_status = gen6_pp_initialize(ctx, pp_context,
3984                                    src_surface,
3985                                    src_rect,
3986                                    dst_surface,
3987                                    dst_rect,
3988                                    pp_index,
3989                                    filter_param);
3990
3991     if (va_status == VA_STATUS_SUCCESS) {
3992         gen6_pp_states_setup(ctx, pp_context);
3993         gen6_pp_pipeline_setup(ctx, pp_context);
3994     }
3995
3996     return va_status;
3997 }
3998
3999 static VAStatus
4000 i965_post_processing_internal(
4001     VADriverContextP   ctx,
4002     struct i965_post_processing_context *pp_context,
4003     const struct i965_surface *src_surface,
4004     const VARectangle *src_rect,
4005     struct i965_surface *dst_surface,
4006     const VARectangle *dst_rect,
4007     int                pp_index,
4008     void *filter_param
4009 )
4010 {
4011     struct i965_driver_data *i965 = i965_driver_data(ctx);
4012     VAStatus va_status;
4013
4014     if (IS_GEN6(i965->intel.device_id) ||
4015         IS_GEN7(i965->intel.device_id))
4016         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4017     else
4018         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4019     
4020     return va_status;
4021 }
4022
4023 VAStatus 
4024 i965_DestroySurfaces(VADriverContextP ctx,
4025                      VASurfaceID *surface_list,
4026                      int num_surfaces);
4027 VAStatus 
4028 i965_CreateSurfaces(VADriverContextP ctx,
4029                     int width,
4030                     int height,
4031                     int format,
4032                     int num_surfaces,
4033                     VASurfaceID *surfaces);
4034
4035 static void
4036 rgb_to_yuv(unsigned int argb,
4037            unsigned char *y,
4038            unsigned char *u,
4039            unsigned char *v,
4040            unsigned char *a)
4041 {
4042     int r = ((argb >> 16) & 0xff);
4043     int g = ((argb >> 8) & 0xff);
4044     int b = ((argb >> 0) & 0xff);
4045     
4046     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4047     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4048     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4049     *a = ((argb >> 24) & 0xff);
4050 }
4051
4052 static void 
4053 i965_vpp_clear_surface(VADriverContextP ctx,
4054                        struct i965_post_processing_context *pp_context,
4055                        VASurfaceID surface,
4056                        unsigned int color)
4057 {
4058     struct i965_driver_data *i965 = i965_driver_data(ctx);
4059     struct intel_batchbuffer *batch = pp_context->batch;
4060     struct object_surface *obj_surface = SURFACE(surface);
4061     unsigned int blt_cmd, br13;
4062     unsigned int tiling = 0, swizzle = 0;
4063     int pitch;
4064     unsigned char y, u, v, a = 0;
4065
4066     /* Currently only support NV12 surface */
4067     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4068         return;
4069
4070     rgb_to_yuv(color, &y, &u, &v, &a);
4071
4072     if (a == 0)
4073         return;
4074
4075     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4076     blt_cmd = XY_COLOR_BLT_CMD;
4077     pitch = obj_surface->width;
4078
4079     if (tiling != I915_TILING_NONE) {
4080         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4081         pitch >>= 2;
4082     }
4083
4084     br13 = 0xf0 << 16;
4085     br13 |= BR13_8;
4086     br13 |= pitch;
4087
4088     if (IS_GEN6(i965->intel.device_id) ||
4089         IS_GEN7(i965->intel.device_id)) {
4090         intel_batchbuffer_start_atomic_blt(batch, 48);
4091         BEGIN_BLT_BATCH(batch, 12);
4092     } else {
4093         intel_batchbuffer_start_atomic(batch, 48);
4094         BEGIN_BATCH(batch, 12);
4095     }
4096
4097     OUT_BATCH(batch, blt_cmd);
4098     OUT_BATCH(batch, br13);
4099     OUT_BATCH(batch,
4100               0 << 16 |
4101               0);
4102     OUT_BATCH(batch,
4103               obj_surface->height << 16 |
4104               obj_surface->width);
4105     OUT_RELOC(batch, obj_surface->bo, 
4106               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4107               0);
4108     OUT_BATCH(batch, y);
4109
4110     br13 = 0xf0 << 16;
4111     br13 |= BR13_565;
4112     br13 |= pitch;
4113
4114     OUT_BATCH(batch, blt_cmd);
4115     OUT_BATCH(batch, br13);
4116     OUT_BATCH(batch,
4117               0 << 16 |
4118               0);
4119     OUT_BATCH(batch,
4120               obj_surface->height / 2 << 16 |
4121               obj_surface->width / 2);
4122     OUT_RELOC(batch, obj_surface->bo, 
4123               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4124               obj_surface->width * obj_surface->y_cb_offset);
4125     OUT_BATCH(batch, v << 8 | u);
4126
4127     ADVANCE_BATCH(batch);
4128     intel_batchbuffer_end_atomic(batch);
4129 }
4130
4131 VASurfaceID
4132 i965_post_processing(
4133     VADriverContextP   ctx,
4134     VASurfaceID        surface,
4135     const VARectangle *src_rect,
4136     const VARectangle *dst_rect,
4137     unsigned int       flags,
4138     int               *has_done_scaling  
4139 )
4140 {
4141     struct i965_driver_data *i965 = i965_driver_data(ctx);
4142     VASurfaceID in_surface_id = surface;
4143     VASurfaceID out_surface_id = VA_INVALID_ID;
4144     
4145     *has_done_scaling = 0;
4146
4147     if (HAS_PP(i965)) {
4148         struct object_surface *obj_surface;
4149         VAStatus status;
4150         struct i965_surface src_surface;
4151         struct i965_surface dst_surface;
4152
4153         obj_surface = SURFACE(in_surface_id);
4154
4155         /* Currently only support post processing for NV12 surface */
4156         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4157             return out_surface_id;
4158
4159         _i965LockMutex(&i965->pp_mutex);
4160
4161         if (flags & I965_PP_FLAG_MCDI) {
4162             status = i965_CreateSurfaces(ctx,
4163                                          obj_surface->orig_width,
4164                                          obj_surface->orig_height,
4165                                          VA_RT_FORMAT_YUV420,
4166                                          1,
4167                                          &out_surface_id);
4168             assert(status == VA_STATUS_SUCCESS);
4169             obj_surface = SURFACE(out_surface_id);
4170             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4171             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4172             src_surface.id = in_surface_id;
4173             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4174             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4175                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4176             dst_surface.id = out_surface_id;
4177             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4178             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4179
4180             i965_post_processing_internal(ctx, i965->pp_context,
4181                                           &src_surface,
4182                                           src_rect,
4183                                           &dst_surface,
4184                                           dst_rect,
4185                                           PP_NV12_DNDI,
4186                                           NULL);
4187         }
4188
4189         if (flags & I965_PP_FLAG_AVS) {
4190             struct i965_render_state *render_state = &i965->render_state;
4191             struct intel_region *dest_region = render_state->draw_region;
4192
4193             if (out_surface_id != VA_INVALID_ID)
4194                 in_surface_id = out_surface_id;
4195
4196             status = i965_CreateSurfaces(ctx,
4197                                          dest_region->width,
4198                                          dest_region->height,
4199                                          VA_RT_FORMAT_YUV420,
4200                                          1,
4201                                          &out_surface_id);
4202             assert(status == VA_STATUS_SUCCESS);
4203             obj_surface = SURFACE(out_surface_id);
4204             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4205             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4206             src_surface.id = in_surface_id;
4207             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4208             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4209             dst_surface.id = out_surface_id;
4210             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4211             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4212
4213             i965_post_processing_internal(ctx, i965->pp_context,
4214                                           &src_surface,
4215                                           src_rect,
4216                                           &dst_surface,
4217                                           dst_rect,
4218                                           PP_NV12_AVS,
4219                                           NULL);
4220
4221             if (in_surface_id != surface)
4222                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4223                 
4224             *has_done_scaling = 1;
4225         }
4226
4227         _i965UnlockMutex(&i965->pp_mutex);
4228     }
4229
4230     return out_surface_id;
4231 }       
4232
4233 static VAStatus
4234 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4235                           const struct i965_surface *src_surface,
4236                           const VARectangle *src_rect,
4237                           struct i965_surface *dst_surface,
4238                           const VARectangle *dst_rect)
4239 {
4240     struct i965_driver_data *i965 = i965_driver_data(ctx);
4241     struct i965_post_processing_context *pp_context = i965->pp_context;
4242     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4243
4244     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4245         i965_post_processing_internal(ctx, i965->pp_context,
4246                                       src_surface,
4247                                       src_rect,
4248                                       dst_surface,
4249                                       dst_rect,
4250                                       PP_RGBX_LOAD_SAVE_NV12,
4251                                       NULL);
4252     } else {
4253         assert(0);
4254         return VA_STATUS_ERROR_UNKNOWN;
4255     }
4256
4257     intel_batchbuffer_flush(pp_context->batch);
4258
4259     return VA_STATUS_SUCCESS;
4260 }
4261
4262 static VAStatus
4263 i965_image_pl3_processing(VADriverContextP ctx,
4264                           const struct i965_surface *src_surface,
4265                           const VARectangle *src_rect,
4266                           struct i965_surface *dst_surface,
4267                           const VARectangle *dst_rect)
4268 {
4269     struct i965_driver_data *i965 = i965_driver_data(ctx);
4270     struct i965_post_processing_context *pp_context = i965->pp_context;
4271     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4272     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4273
4274     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4275         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4276                                                  src_surface,
4277                                                  src_rect,
4278                                                  dst_surface,
4279                                                  dst_rect,
4280                                                  PP_PL3_LOAD_SAVE_N12,
4281                                                  NULL);
4282     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4283                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4284                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4285                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4286         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4287                                                  src_surface,
4288                                                  src_rect,
4289                                                  dst_surface,
4290                                                  dst_rect,
4291                                                  PP_PL3_LOAD_SAVE_PL3,
4292                                                  NULL);
4293     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4294                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4295         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4296                                                  src_surface,
4297                                                  src_rect,
4298                                                  dst_surface,
4299                                                  dst_rect,
4300                                                  PP_PL3_LOAD_SAVE_PA,
4301                                                  NULL);
4302     }
4303     else {
4304         assert(0);
4305     }
4306
4307     intel_batchbuffer_flush(pp_context->batch);
4308
4309     return vaStatus;
4310 }
4311
4312 static VAStatus
4313 i965_image_pl2_processing(VADriverContextP ctx,
4314                           const struct i965_surface *src_surface,
4315                           const VARectangle *src_rect,
4316                           struct i965_surface *dst_surface,
4317                           const VARectangle *dst_rect)
4318 {
4319     struct i965_driver_data *i965 = i965_driver_data(ctx);
4320     struct i965_post_processing_context *pp_context = i965->pp_context;
4321     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4322     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4323
4324     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4325         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4326                                                  src_surface,
4327                                                  src_rect,
4328                                                  dst_surface,
4329                                                  dst_rect,
4330                                                  PP_NV12_LOAD_SAVE_N12,
4331                                                  NULL);
4332     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4333                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4334                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4335                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4336         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4337                                                  src_surface,
4338                                                  src_rect,
4339                                                  dst_surface,
4340                                                  dst_rect,
4341                                                  PP_NV12_LOAD_SAVE_PL3,
4342                                                  NULL);
4343     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4344                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4345         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4346                                                  src_surface,
4347                                                  src_rect,
4348                                                  dst_surface,
4349                                                  dst_rect,
4350                                                  PP_NV12_LOAD_SAVE_PA,
4351                                                      NULL);
4352     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4353                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4354                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4355                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4356         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4357                                       src_surface,
4358                                       src_rect,
4359                                       dst_surface,
4360                                       dst_rect,
4361                                       PP_NV12_LOAD_SAVE_RGBX,
4362                                       NULL);
4363     } else {
4364         assert(0);
4365         return VA_STATUS_ERROR_UNKNOWN;
4366     }
4367
4368     intel_batchbuffer_flush(pp_context->batch);
4369
4370     return vaStatus;
4371 }
4372
4373 static VAStatus
4374 i965_image_pl1_processing(VADriverContextP ctx,
4375                           const struct i965_surface *src_surface,
4376                           const VARectangle *src_rect,
4377                           struct i965_surface *dst_surface,
4378                           const VARectangle *dst_rect)
4379 {
4380     struct i965_driver_data *i965 = i965_driver_data(ctx);
4381     struct i965_post_processing_context *pp_context = i965->pp_context;
4382     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4383
4384     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4385         i965_post_processing_internal(ctx, i965->pp_context,
4386                                       src_surface,
4387                                       src_rect,
4388                                       dst_surface,
4389                                       dst_rect,
4390                                       PP_PA_LOAD_SAVE_NV12,
4391                                       NULL);
4392     }
4393     else if (fourcc == VA_FOURCC_YV12) {
4394         i965_post_processing_internal(ctx, i965->pp_context,
4395                                       src_surface,
4396                                       src_rect,
4397                                       dst_surface,
4398                                       dst_rect,
4399                                       PP_PA_LOAD_SAVE_PL3,
4400                                       NULL);
4401
4402     }
4403     else {
4404         return VA_STATUS_ERROR_UNKNOWN;
4405     }
4406
4407     intel_batchbuffer_flush(pp_context->batch);
4408
4409     return VA_STATUS_SUCCESS;
4410 }
4411
4412 VAStatus
4413 i965_image_processing(VADriverContextP ctx,
4414                       const struct i965_surface *src_surface,
4415                       const VARectangle *src_rect,
4416                       struct i965_surface *dst_surface,
4417                       const VARectangle *dst_rect)
4418 {
4419     struct i965_driver_data *i965 = i965_driver_data(ctx);
4420     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4421
4422     if (HAS_PP(i965)) {
4423         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4424
4425         _i965LockMutex(&i965->pp_mutex);
4426
4427         switch (fourcc) {
4428         case VA_FOURCC('Y', 'V', '1', '2'):
4429         case VA_FOURCC('I', '4', '2', '0'):
4430         case VA_FOURCC('I', 'M', 'C', '1'):
4431         case VA_FOURCC('I', 'M', 'C', '3'):
4432             status = i965_image_pl3_processing(ctx,
4433                                                src_surface,
4434                                                src_rect,
4435                                                dst_surface,
4436                                                dst_rect);
4437             break;
4438
4439         case  VA_FOURCC('N', 'V', '1', '2'):
4440             status = i965_image_pl2_processing(ctx,
4441                                                src_surface,
4442                                                src_rect,
4443                                                dst_surface,
4444                                                dst_rect);
4445             break;
4446         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4447         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4448             status = i965_image_pl1_processing(ctx,
4449                                                src_surface,
4450                                                src_rect,
4451                                                dst_surface,
4452                                                dst_rect);
4453             break;
4454         case VA_FOURCC('B', 'G', 'R', 'A'):
4455         case VA_FOURCC('B', 'G', 'R', 'X'):
4456         case VA_FOURCC('R', 'G', 'B', 'A'):
4457         case VA_FOURCC('R', 'G', 'B', 'X'):
4458             status = i965_image_pl1_rgbx_processing(ctx,
4459                                                src_surface,
4460                                                src_rect,
4461                                                dst_surface,
4462                                                dst_rect);
4463             break;
4464         default:
4465             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4466             break;
4467         }
4468         
4469         _i965UnlockMutex(&i965->pp_mutex);
4470     }
4471
4472     return status;
4473 }       
4474
4475 static void
4476 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4477 {
4478     int i;
4479
4480     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4481     pp_context->surface_state_binding_table.bo = NULL;
4482
4483     dri_bo_unreference(pp_context->curbe.bo);
4484     pp_context->curbe.bo = NULL;
4485
4486     dri_bo_unreference(pp_context->sampler_state_table.bo);
4487     pp_context->sampler_state_table.bo = NULL;
4488
4489     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4490     pp_context->sampler_state_table.bo_8x8 = NULL;
4491
4492     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4493     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4494
4495     dri_bo_unreference(pp_context->idrt.bo);
4496     pp_context->idrt.bo = NULL;
4497     pp_context->idrt.num_interface_descriptors = 0;
4498
4499     dri_bo_unreference(pp_context->vfe_state.bo);
4500     pp_context->vfe_state.bo = NULL;
4501
4502     dri_bo_unreference(pp_context->stmm.bo);
4503     pp_context->stmm.bo = NULL;
4504
4505     for (i = 0; i < NUM_PP_MODULES; i++) {
4506         struct pp_module *pp_module = &pp_context->pp_modules[i];
4507
4508         dri_bo_unreference(pp_module->kernel.bo);
4509         pp_module->kernel.bo = NULL;
4510     }
4511
4512     free(pp_context->pp_static_parameter);
4513     free(pp_context->pp_inline_parameter);
4514     pp_context->pp_static_parameter = NULL;
4515     pp_context->pp_inline_parameter = NULL;
4516 }
4517
4518 Bool
4519 i965_post_processing_terminate(VADriverContextP ctx)
4520 {
4521     struct i965_driver_data *i965 = i965_driver_data(ctx);
4522     struct i965_post_processing_context *pp_context = i965->pp_context;
4523
4524     if (pp_context) {
4525         i965_post_processing_context_finalize(pp_context);
4526         free(pp_context);
4527     }
4528
4529     i965->pp_context = NULL;
4530
4531     return True;
4532 }
4533
4534 static void
4535 i965_post_processing_context_init(VADriverContextP ctx,
4536                                   struct i965_post_processing_context *pp_context,
4537                                   struct intel_batchbuffer *batch)
4538 {
4539     struct i965_driver_data *i965 = i965_driver_data(ctx);
4540     int i;
4541
4542     pp_context->urb.size = URB_SIZE((&i965->intel));
4543     pp_context->urb.num_vfe_entries = 32;
4544     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4545     pp_context->urb.num_cs_entries = 1;
4546     
4547     if (IS_GEN7(i965->intel.device_id))
4548         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4549     else
4550         pp_context->urb.size_cs_entry = 2;
4551
4552     pp_context->urb.vfe_start = 0;
4553     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4554         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4555     assert(pp_context->urb.cs_start + 
4556            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4557
4558     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4559     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4560     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4561
4562     if (IS_GEN7(i965->intel.device_id))
4563         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4564     else if (IS_GEN6(i965->intel.device_id))
4565         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4566     else if (IS_IRONLAKE(i965->intel.device_id))
4567         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4568
4569     for (i = 0; i < NUM_PP_MODULES; i++) {
4570         struct pp_module *pp_module = &pp_context->pp_modules[i];
4571         dri_bo_unreference(pp_module->kernel.bo);
4572         if (pp_module->kernel.bin && pp_module->kernel.size) {
4573             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4574                                                 pp_module->kernel.name,
4575                                                 pp_module->kernel.size,
4576                                                 4096);
4577             assert(pp_module->kernel.bo);
4578             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4579         } else {
4580             pp_module->kernel.bo = NULL;
4581         }
4582     }
4583
4584     /* static & inline parameters */
4585     if (IS_GEN7(i965->intel.device_id)) {
4586         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4587         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4588     } else {
4589         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4590         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4591     }
4592
4593     pp_context->batch = batch;
4594 }
4595
4596 Bool
4597 i965_post_processing_init(VADriverContextP ctx)
4598 {
4599     struct i965_driver_data *i965 = i965_driver_data(ctx);
4600     struct i965_post_processing_context *pp_context = i965->pp_context;
4601
4602     if (HAS_PP(i965)) {
4603         if (pp_context == NULL) {
4604             pp_context = calloc(1, sizeof(*pp_context));
4605             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4606             i965->pp_context = pp_context;
4607         }
4608     }
4609
4610     return True;
4611 }
4612
4613 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4614     PP_NULL,    /* VAProcFilterNone */
4615     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4616     PP_NULL,    /* VAProcFilterDeblocking */
4617     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4618     PP_NULL,    /* VAProcFilterSharpening */
4619     PP_NULL,    /* VAProcFilterColorBalance */
4620     PP_NULL,    /* VAProcFilterColorStandard */
4621     PP_NULL,    /* VAProcFilterFrameRateConversion */
4622 };
4623
4624 static const int proc_frame_to_pp_frame[3] = {
4625     I965_SURFACE_FLAG_FRAME,
4626     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4627     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4628 };
4629
4630 static void 
4631 i965_proc_picture(VADriverContextP ctx, 
4632                   VAProfile profile, 
4633                   union codec_state *codec_state,
4634                   struct hw_context *hw_context)
4635 {
4636     struct i965_driver_data *i965 = i965_driver_data(ctx);
4637     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4638     struct proc_state *proc_state = &codec_state->proc;
4639     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4640     struct object_surface *obj_surface;
4641     struct i965_surface src_surface, dst_surface;
4642     VARectangle src_rect, dst_rect;
4643     VAStatus status;
4644     int i;
4645     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4646     int num_tmp_surfaces = 0;
4647     unsigned int tiling = 0, swizzle = 0;
4648     int in_width, in_height;
4649
4650     assert(pipeline_param->surface != VA_INVALID_ID);
4651     assert(proc_state->current_render_target != VA_INVALID_ID);
4652
4653     obj_surface = SURFACE(pipeline_param->surface);
4654     in_width = obj_surface->orig_width;
4655     in_height = obj_surface->orig_height;
4656     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4657
4658     src_surface.id = pipeline_param->surface;
4659     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4660     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4661
4662     VASurfaceID out_surface_id = VA_INVALID_ID;
4663     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4664         src_surface.id = pipeline_param->surface;
4665         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4666         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4667         src_rect.x = 0;
4668         src_rect.y = 0;
4669         src_rect.width = in_width;
4670         src_rect.height = in_height;
4671
4672         status = i965_CreateSurfaces(ctx,
4673                                      in_width,
4674                                      in_height,
4675                                      VA_RT_FORMAT_YUV420,
4676                                      1,
4677                                      &out_surface_id);
4678         assert(status == VA_STATUS_SUCCESS);
4679         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4680         obj_surface = SURFACE(out_surface_id);
4681         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4682
4683         dst_surface.id = out_surface_id;
4684         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4685         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4686         dst_rect.x = 0;
4687         dst_rect.y = 0;
4688         dst_rect.width = in_width;
4689         dst_rect.height = in_height;
4690
4691         status = i965_image_processing(ctx,
4692                                        &src_surface,
4693                                        &src_rect,
4694                                        &dst_surface,
4695                                        &dst_rect);
4696         assert(status == VA_STATUS_SUCCESS);
4697
4698         src_surface.id = out_surface_id;
4699         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4700         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4701     }
4702
4703     if (pipeline_param->surface_region) {
4704         src_rect.x = pipeline_param->surface_region->x;
4705         src_rect.y = pipeline_param->surface_region->y;
4706         src_rect.width = pipeline_param->surface_region->width;
4707         src_rect.height = pipeline_param->surface_region->height;
4708     } else {
4709         src_rect.x = 0;
4710         src_rect.y = 0;
4711         src_rect.width = in_width;
4712         src_rect.height = in_height;
4713     }
4714
4715     if (pipeline_param->output_region) {
4716         dst_rect.x = pipeline_param->output_region->x;
4717         dst_rect.y = pipeline_param->output_region->y;
4718         dst_rect.width = pipeline_param->output_region->width;
4719         dst_rect.height = pipeline_param->output_region->height;
4720     } else {
4721         dst_rect.x = 0;
4722         dst_rect.y = 0;
4723         dst_rect.width = in_width;
4724         dst_rect.height = in_height;
4725     }
4726
4727     for (i = 0; i < pipeline_param->num_filters; i++) {
4728         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4729         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4730         VAProcFilterType filter_type = filter_param->type;
4731         out_surface_id = VA_INVALID_ID;
4732         int kernel_index = procfilter_to_pp_flag[filter_type];
4733
4734         if (kernel_index != PP_NULL &&
4735             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4736             status = i965_CreateSurfaces(ctx,
4737                                          in_width,
4738                                          in_height,
4739                                          VA_RT_FORMAT_YUV420,
4740                                          1,
4741                                          &out_surface_id);
4742             assert(status == VA_STATUS_SUCCESS);
4743             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4744             obj_surface = SURFACE(out_surface_id);
4745             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4746             dst_surface.id = out_surface_id;
4747             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4748             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4749                                                    &src_surface,
4750                                                    &src_rect,
4751                                                    &dst_surface,
4752                                                    &src_rect,
4753                                                    kernel_index,
4754                                                    filter_param);
4755
4756             if (status == VA_STATUS_SUCCESS) {
4757                 src_surface.id = dst_surface.id;
4758                 src_surface.type = dst_surface.type;
4759                 src_surface.flags = dst_surface.flags;
4760             }
4761         }
4762     }
4763
4764     obj_surface = SURFACE(proc_state->current_render_target);
4765     int csc_needed = 0;
4766     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
4767         csc_needed = 1;
4768         out_surface_id = VA_INVALID_ID;
4769         status = i965_CreateSurfaces(ctx,
4770                                      obj_surface->orig_width,
4771                                      obj_surface->orig_height,
4772                                      VA_RT_FORMAT_YUV420, 
4773                                      1,
4774                                      &out_surface_id);
4775         assert(status == VA_STATUS_SUCCESS);
4776         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4777         struct object_surface *csc_surface = SURFACE(out_surface_id);
4778         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4779         dst_surface.id = out_surface_id;
4780     } else {
4781         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4782         dst_surface.id = proc_state->current_render_target;
4783     }
4784
4785     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4786     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4787
4788     // load/save doesn't support different origin offset for src and dst surface
4789     if (src_rect.width == dst_rect.width &&
4790         src_rect.height == dst_rect.height &&
4791         src_rect.x == dst_rect.x &&
4792         src_rect.y == dst_rect.y) {
4793         i965_post_processing_internal(ctx, &proc_context->pp_context,
4794                                       &src_surface,
4795                                       &src_rect,
4796                                       &dst_surface,
4797                                       &dst_rect,
4798                                       PP_NV12_LOAD_SAVE_N12,
4799                                       NULL);
4800     } else {
4801
4802         i965_post_processing_internal(ctx, &proc_context->pp_context,
4803                                       &src_surface,
4804                                       &src_rect,
4805                                       &dst_surface,
4806                                       &dst_rect,
4807                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4808                                       PP_NV12_AVS : PP_NV12_SCALING,
4809                                       NULL);
4810     }
4811
4812     if (csc_needed) {
4813         src_surface.id = dst_surface.id;
4814         src_surface.type = dst_surface.type;
4815         src_surface.flags = dst_surface.flags;
4816         dst_surface.id = proc_state->current_render_target;
4817         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4818         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
4819     }
4820     
4821     if (num_tmp_surfaces)
4822         i965_DestroySurfaces(ctx,
4823                              tmp_surfaces,
4824                              num_tmp_surfaces);
4825
4826     intel_batchbuffer_flush(hw_context->batch);
4827 }
4828
4829 static void
4830 i965_proc_context_destroy(void *hw_context)
4831 {
4832     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4833
4834     i965_post_processing_context_finalize(&proc_context->pp_context);
4835     intel_batchbuffer_free(proc_context->base.batch);
4836     free(proc_context);
4837 }
4838
4839 struct hw_context *
4840 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4841 {
4842     struct intel_driver_data *intel = intel_driver_data(ctx);
4843     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4844
4845     proc_context->base.destroy = i965_proc_context_destroy;
4846     proc_context->base.run = i965_proc_picture;
4847     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4848     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4849
4850     return (struct hw_context *)proc_context;
4851 }