add rgbx to nv12 conversion in post-processing
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
47 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
48 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
49
50 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
51 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
52 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
57
58 static const uint32_t pp_null_gen5[][4] = {
59 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
63 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
64 };
65
66 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
67 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
68 };
69
70 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
71 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
72 };
73
74 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
75 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
76 };
77
78 static const uint32_t pp_nv12_scaling_gen5[][4] = {
79 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
80 };
81
82 static const uint32_t pp_nv12_avs_gen5[][4] = {
83 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
84 };
85
86 static const uint32_t pp_nv12_dndi_gen5[][4] = {
87 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
88 };
89
90 static const uint32_t pp_nv12_dn_gen5[][4] = {
91 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
92 };
93
94 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
95 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
96 };
97
98 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
99 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
100 };
101
102 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
103 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
104 };
105
106 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
107 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
108 };
109
110 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
111 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
112 };
113
114 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
115                                    const struct i965_surface *src_surface,
116                                    const VARectangle *src_rect,
117                                    struct i965_surface *dst_surface,
118                                    const VARectangle *dst_rect,
119                                    void *filter_param);
120 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
121                                             const struct i965_surface *src_surface,
122                                             const VARectangle *src_rect,
123                                             struct i965_surface *dst_surface,
124                                             const VARectangle *dst_rect,
125                                             void *filter_param);
126 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
127                                            const struct i965_surface *src_surface,
128                                            const VARectangle *src_rect,
129                                            struct i965_surface *dst_surface,
130                                            const VARectangle *dst_rect,
131                                            void *filter_param);
132 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
133                                              const struct i965_surface *src_surface,
134                                              const VARectangle *src_rect,
135                                              struct i965_surface *dst_surface,
136                                              const VARectangle *dst_rect,
137                                              void *filter_param);
138 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
139                                                 const struct i965_surface *src_surface,
140                                                 const VARectangle *src_rect,
141                                                 struct i965_surface *dst_surface,
142                                                 const VARectangle *dst_rect,
143                                                 void *filter_param);
144 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
145                                         const struct i965_surface *src_surface,
146                                         const VARectangle *src_rect,
147                                         struct i965_surface *dst_surface,
148                                         const VARectangle *dst_rect,
149                                         void *filter_param);
150 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
151                                       const struct i965_surface *src_surface,
152                                       const VARectangle *src_rect,
153                                       struct i965_surface *dst_surface,
154                                       const VARectangle *dst_rect,
155                                       void *filter_param);
156
157 static struct pp_module pp_modules_gen5[] = {
158     {
159         {
160             "NULL module (for testing)",
161             PP_NULL,
162             pp_null_gen5,
163             sizeof(pp_null_gen5),
164             NULL,
165         },
166
167         pp_null_initialize,
168     },
169
170     {
171         {
172             "NV12_NV12",
173             PP_NV12_LOAD_SAVE_N12,
174             pp_nv12_load_save_nv12_gen5,
175             sizeof(pp_nv12_load_save_nv12_gen5),
176             NULL,
177         },
178
179         pp_plx_load_save_plx_initialize,
180     },
181
182     {
183         {
184             "NV12_PL3",
185             PP_NV12_LOAD_SAVE_PL3,
186             pp_nv12_load_save_pl3_gen5,
187             sizeof(pp_nv12_load_save_pl3_gen5),
188             NULL,
189         },
190
191         pp_plx_load_save_plx_initialize,
192     },
193
194     {
195         {
196             "PL3_NV12",
197             PP_PL3_LOAD_SAVE_N12,
198             pp_pl3_load_save_nv12_gen5,
199             sizeof(pp_pl3_load_save_nv12_gen5),
200             NULL,
201         },
202
203         pp_plx_load_save_plx_initialize,
204     },
205
206     {
207         {
208             "PL3_PL3",
209             PP_PL3_LOAD_SAVE_N12,
210             pp_pl3_load_save_pl3_gen5,
211             sizeof(pp_pl3_load_save_pl3_gen5),
212             NULL,
213         },
214
215         pp_plx_load_save_plx_initialize
216     },
217
218     {
219         {
220             "NV12 Scaling module",
221             PP_NV12_SCALING,
222             pp_nv12_scaling_gen5,
223             sizeof(pp_nv12_scaling_gen5),
224             NULL,
225         },
226
227         pp_nv12_scaling_initialize,
228     },
229
230     {
231         {
232             "NV12 AVS module",
233             PP_NV12_AVS,
234             pp_nv12_avs_gen5,
235             sizeof(pp_nv12_avs_gen5),
236             NULL,
237         },
238
239         pp_nv12_avs_initialize_nlas,
240     },
241
242     {
243         {
244             "NV12 DNDI module",
245             PP_NV12_DNDI,
246             pp_nv12_dndi_gen5,
247             sizeof(pp_nv12_dndi_gen5),
248             NULL,
249         },
250
251         pp_nv12_dndi_initialize,
252     },
253
254     {
255         {
256             "NV12 DN module",
257             PP_NV12_DN,
258             pp_nv12_dn_gen5,
259             sizeof(pp_nv12_dn_gen5),
260             NULL,
261         },
262
263         pp_nv12_dn_initialize,
264     },
265
266     {
267         {
268             "NV12_PA module",
269             PP_NV12_LOAD_SAVE_PA,
270             pp_nv12_load_save_pa_gen5,
271             sizeof(pp_nv12_load_save_pa_gen5),
272             NULL,
273         },
274     
275         pp_plx_load_save_plx_initialize,
276     },
277
278     {
279         {
280             "PL3_PA module",
281             PP_PL3_LOAD_SAVE_PA,
282             pp_pl3_load_save_pa_gen5,
283             sizeof(pp_pl3_load_save_pa_gen5),
284             NULL,
285         },
286     
287         pp_plx_load_save_plx_initialize,
288     },
289
290     {
291         {
292             "PA_NV12 module",
293             PP_PA_LOAD_SAVE_NV12,
294             pp_pa_load_save_nv12_gen5,
295             sizeof(pp_pa_load_save_nv12_gen5),
296             NULL,
297         },
298     
299         pp_plx_load_save_plx_initialize,
300     },
301
302     {
303         {
304             "PA_PL3 module",
305             PP_PA_LOAD_SAVE_PL3,
306             pp_pa_load_save_pl3_gen5,
307             sizeof(pp_pa_load_save_pl3_gen5),
308             NULL,
309         },
310     
311         pp_plx_load_save_plx_initialize,
312     },
313
314     {
315         {
316             "RGBX_NV12 module",
317             PP_RGBX_LOAD_SAVE_NV12,
318             pp_rgbx_load_save_nv12_gen5,
319             sizeof(pp_rgbx_load_save_nv12_gen5),
320             NULL,
321         },
322     
323         pp_plx_load_save_plx_initialize,
324     },
325             
326 };
327
328 static const uint32_t pp_null_gen6[][4] = {
329 #include "shaders/post_processing/gen5_6/null.g6b"
330 };
331
332 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
333 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
334 };
335
336 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
337 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
338 };
339
340 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
341 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
342 };
343
344 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
345 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
346 };
347
348 static const uint32_t pp_nv12_scaling_gen6[][4] = {
349 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
350 };
351
352 static const uint32_t pp_nv12_avs_gen6[][4] = {
353 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
354 };
355
356 static const uint32_t pp_nv12_dndi_gen6[][4] = {
357 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
358 };
359
360 static const uint32_t pp_nv12_dn_gen6[][4] = {
361 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
362 };
363
364 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
365 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
366 };
367
368 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
369 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
370 };
371
372 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
373 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
374 };
375
376 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
377 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
378 };
379
380 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
381 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
382 };
383
384 static struct pp_module pp_modules_gen6[] = {
385     {
386         {
387             "NULL module (for testing)",
388             PP_NULL,
389             pp_null_gen6,
390             sizeof(pp_null_gen6),
391             NULL,
392         },
393
394         pp_null_initialize,
395     },
396
397     {
398         {
399             "NV12_NV12",
400             PP_NV12_LOAD_SAVE_N12,
401             pp_nv12_load_save_nv12_gen6,
402             sizeof(pp_nv12_load_save_nv12_gen6),
403             NULL,
404         },
405
406         pp_plx_load_save_plx_initialize,
407     },
408
409     {
410         {
411             "NV12_PL3",
412             PP_NV12_LOAD_SAVE_PL3,
413             pp_nv12_load_save_pl3_gen6,
414             sizeof(pp_nv12_load_save_pl3_gen6),
415             NULL,
416         },
417         
418         pp_plx_load_save_plx_initialize,
419     },
420
421     {
422         {
423             "PL3_NV12",
424             PP_PL3_LOAD_SAVE_N12,
425             pp_pl3_load_save_nv12_gen6,
426             sizeof(pp_pl3_load_save_nv12_gen6),
427             NULL,
428         },
429
430         pp_plx_load_save_plx_initialize,
431     },
432
433     {
434         {
435             "PL3_PL3",
436             PP_PL3_LOAD_SAVE_N12,
437             pp_pl3_load_save_pl3_gen6,
438             sizeof(pp_pl3_load_save_pl3_gen6),
439             NULL,
440         },
441
442         pp_plx_load_save_plx_initialize,
443     },
444
445     {
446         {
447             "NV12 Scaling module",
448             PP_NV12_SCALING,
449             pp_nv12_scaling_gen6,
450             sizeof(pp_nv12_scaling_gen6),
451             NULL,
452         },
453
454         gen6_nv12_scaling_initialize,
455     },
456
457     {
458         {
459             "NV12 AVS module",
460             PP_NV12_AVS,
461             pp_nv12_avs_gen6,
462             sizeof(pp_nv12_avs_gen6),
463             NULL,
464         },
465
466         pp_nv12_avs_initialize_nlas,
467     },
468
469     {
470         {
471             "NV12 DNDI module",
472             PP_NV12_DNDI,
473             pp_nv12_dndi_gen6,
474             sizeof(pp_nv12_dndi_gen6),
475             NULL,
476         },
477
478         pp_nv12_dndi_initialize,
479     },
480
481     {
482         {
483             "NV12 DN module",
484             PP_NV12_DN,
485             pp_nv12_dn_gen6,
486             sizeof(pp_nv12_dn_gen6),
487             NULL,
488         },
489
490         pp_nv12_dn_initialize,
491     },
492     {
493         {
494             "NV12_PA module",
495             PP_NV12_LOAD_SAVE_PA,
496             pp_nv12_load_save_pa_gen6,
497             sizeof(pp_nv12_load_save_pa_gen6),
498             NULL,
499         },
500     
501         pp_plx_load_save_plx_initialize,
502     },
503     
504     {
505         {
506             "PL3_PA module",
507             PP_PL3_LOAD_SAVE_PA,
508             pp_pl3_load_save_pa_gen6,
509             sizeof(pp_pl3_load_save_pa_gen6),
510             NULL,
511         },
512     
513         pp_plx_load_save_plx_initialize,
514     },
515     
516     {
517         {
518             "PA_NV12 module",
519             PP_PA_LOAD_SAVE_NV12,
520             pp_pa_load_save_nv12_gen6,
521             sizeof(pp_pa_load_save_nv12_gen6),
522             NULL,
523         },
524     
525         pp_plx_load_save_plx_initialize,
526     },
527
528     {
529         {
530             "PA_PL3 module",
531             PP_PA_LOAD_SAVE_PL3,
532             pp_pa_load_save_pl3_gen6,
533             sizeof(pp_pa_load_save_pl3_gen6),
534             NULL,
535         },
536     
537         pp_plx_load_save_plx_initialize,
538     },
539     
540     {
541         {
542             "RGBX_NV12 module",
543             PP_RGBX_LOAD_SAVE_NV12,
544             pp_rgbx_load_save_nv12_gen6,
545             sizeof(pp_rgbx_load_save_nv12_gen6),
546             NULL,
547         },
548     
549         pp_plx_load_save_plx_initialize,
550     },
551
552 };
553
554 static const uint32_t pp_null_gen7[][4] = {
555 };
556
557 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
558 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
559 };
560
561 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
562 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
563 };
564
565 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
566 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
567 };
568
569 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
570 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
571 };
572
573 static const uint32_t pp_nv12_scaling_gen7[][4] = {
574 #include "shaders/post_processing/gen7/avs.g7b"
575 };
576
577 static const uint32_t pp_nv12_avs_gen7[][4] = {
578 #include "shaders/post_processing/gen7/avs.g7b"
579 };
580
581 static const uint32_t pp_nv12_dndi_gen7[][4] = {
582 // #include "shaders/post_processing/gen7/dndi.g7b"
583 };
584
585 static const uint32_t pp_nv12_dn_gen7[][4] = {
586 };
587 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
588 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
589 };
590 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
591 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
592 };
593 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
594 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
595 };
596 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
597 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
598 };
599 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
600 };
601
602 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
603                                            const struct i965_surface *src_surface,
604                                            const VARectangle *src_rect,
605                                            struct i965_surface *dst_surface,
606                                            const VARectangle *dst_rect,
607                                            void *filter_param);
608 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
609                                              const struct i965_surface *src_surface,
610                                              const VARectangle *src_rect,
611                                              struct i965_surface *dst_surface,
612                                              const VARectangle *dst_rect,
613                                              void *filter_param);
614 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
615                                            const struct i965_surface *src_surface,
616                                            const VARectangle *src_rect,
617                                            struct i965_surface *dst_surface,
618                                            const VARectangle *dst_rect,
619                                            void *filter_param);
620
621 static struct pp_module pp_modules_gen7[] = {
622     {
623         {
624             "NULL module (for testing)",
625             PP_NULL,
626             pp_null_gen7,
627             sizeof(pp_null_gen7),
628             NULL,
629         },
630
631         pp_null_initialize,
632     },
633
634     {
635         {
636             "NV12_NV12",
637             PP_NV12_LOAD_SAVE_N12,
638             pp_nv12_load_save_nv12_gen7,
639             sizeof(pp_nv12_load_save_nv12_gen7),
640             NULL,
641         },
642
643         gen7_pp_plx_avs_initialize,
644     },
645
646     {
647         {
648             "NV12_PL3",
649             PP_NV12_LOAD_SAVE_PL3,
650             pp_nv12_load_save_pl3_gen7,
651             sizeof(pp_nv12_load_save_pl3_gen7),
652             NULL,
653         },
654         
655         gen7_pp_plx_avs_initialize,
656     },
657
658     {
659         {
660             "PL3_NV12",
661             PP_PL3_LOAD_SAVE_N12,
662             pp_pl3_load_save_nv12_gen7,
663             sizeof(pp_pl3_load_save_nv12_gen7),
664             NULL,
665         },
666
667         gen7_pp_plx_avs_initialize,
668     },
669
670     {
671         {
672             "PL3_PL3",
673             PP_PL3_LOAD_SAVE_N12,
674             pp_pl3_load_save_pl3_gen7,
675             sizeof(pp_pl3_load_save_pl3_gen7),
676             NULL,
677         },
678
679         gen7_pp_plx_avs_initialize,
680     },
681
682     {
683         {
684             "NV12 Scaling module",
685             PP_NV12_SCALING,
686             pp_nv12_scaling_gen7,
687             sizeof(pp_nv12_scaling_gen7),
688             NULL,
689         },
690
691         gen7_pp_plx_avs_initialize,
692     },
693
694     {
695         {
696             "NV12 AVS module",
697             PP_NV12_AVS,
698             pp_nv12_avs_gen7,
699             sizeof(pp_nv12_avs_gen7),
700             NULL,
701         },
702
703         gen7_pp_plx_avs_initialize,
704     },
705
706     {
707         {
708             "NV12 DNDI module",
709             PP_NV12_DNDI,
710             pp_nv12_dndi_gen7,
711             sizeof(pp_nv12_dndi_gen7),
712             NULL,
713         },
714
715         gen7_pp_nv12_dndi_initialize,
716     },
717
718     {
719         {
720             "NV12 DN module",
721             PP_NV12_DN,
722             pp_nv12_dn_gen7,
723             sizeof(pp_nv12_dn_gen7),
724             NULL,
725         },
726
727         gen7_pp_nv12_dn_initialize,
728     },
729     {
730         {
731             "NV12_PA module",
732             PP_NV12_LOAD_SAVE_PA,
733             pp_nv12_load_save_pa_gen7,
734             sizeof(pp_nv12_load_save_pa_gen7),
735             NULL,
736         },
737     
738         gen7_pp_plx_avs_initialize,
739     },
740
741     {
742         {
743             "PL3_PA module",
744             PP_PL3_LOAD_SAVE_PA,
745             pp_pl3_load_save_pa_gen7,
746             sizeof(pp_pl3_load_save_pa_gen7),
747             NULL,
748         },
749     
750         gen7_pp_plx_avs_initialize,
751     },
752
753     {
754         {
755             "PA_NV12 module",
756             PP_PA_LOAD_SAVE_NV12,
757             pp_pa_load_save_nv12_gen7,
758             sizeof(pp_pa_load_save_nv12_gen7),
759             NULL,
760         },
761     
762         gen7_pp_plx_avs_initialize,
763     },
764
765     {
766         {
767             "PA_PL3 module",
768             PP_PA_LOAD_SAVE_PL3,
769             pp_pa_load_save_pl3_gen7,
770             sizeof(pp_pa_load_save_pl3_gen7),
771             NULL,
772         },
773     
774         gen7_pp_plx_avs_initialize,
775     },
776     
777     {
778         {
779             "RGBX_NV12 module",
780             PP_RGBX_LOAD_SAVE_NV12,
781             pp_rgbx_load_save_nv12_gen7,
782             sizeof(pp_rgbx_load_save_nv12_gen7),
783             NULL,
784         },
785     
786         pp_plx_load_save_plx_initialize,
787     },
788
789 };
790
791 static int
792 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
793 {
794     struct i965_driver_data *i965 = i965_driver_data(ctx);
795     int fourcc;
796
797     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
798         struct object_image *obj_image = IMAGE(surface->id);
799         fourcc = obj_image->image.format.fourcc;
800     } else {
801         struct object_surface *obj_surface = SURFACE(surface->id);
802         fourcc = obj_surface->fourcc;
803     }
804
805     return fourcc;
806 }
807
808 static void
809 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
810 {
811     switch (tiling) {
812     case I915_TILING_NONE:
813         ss->ss3.tiled_surface = 0;
814         ss->ss3.tile_walk = 0;
815         break;
816     case I915_TILING_X:
817         ss->ss3.tiled_surface = 1;
818         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
819         break;
820     case I915_TILING_Y:
821         ss->ss3.tiled_surface = 1;
822         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
823         break;
824     }
825 }
826
827 static void
828 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
829 {
830     switch (tiling) {
831     case I915_TILING_NONE:
832         ss->ss2.tiled_surface = 0;
833         ss->ss2.tile_walk = 0;
834         break;
835     case I915_TILING_X:
836         ss->ss2.tiled_surface = 1;
837         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
838         break;
839     case I915_TILING_Y:
840         ss->ss2.tiled_surface = 1;
841         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
842         break;
843     }
844 }
845
846 static void
847 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
848 {
849     switch (tiling) {
850     case I915_TILING_NONE:
851         ss->ss0.tiled_surface = 0;
852         ss->ss0.tile_walk = 0;
853         break;
854     case I915_TILING_X:
855         ss->ss0.tiled_surface = 1;
856         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
857         break;
858     case I915_TILING_Y:
859         ss->ss0.tiled_surface = 1;
860         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
861         break;
862     }
863 }
864
865 static void
866 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
867 {
868     switch (tiling) {
869     case I915_TILING_NONE:
870         ss->ss2.tiled_surface = 0;
871         ss->ss2.tile_walk = 0;
872         break;
873     case I915_TILING_X:
874         ss->ss2.tiled_surface = 1;
875         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
876         break;
877     case I915_TILING_Y:
878         ss->ss2.tiled_surface = 1;
879         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
880         break;
881     }
882 }
883
884 static void
885 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
886 {
887     struct i965_interface_descriptor *desc;
888     dri_bo *bo;
889     int pp_index = pp_context->current_pp;
890
891     bo = pp_context->idrt.bo;
892     dri_bo_map(bo, 1);
893     assert(bo->virtual);
894     desc = bo->virtual;
895     memset(desc, 0, sizeof(*desc));
896     desc->desc0.grf_reg_blocks = 10;
897     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
898     desc->desc1.const_urb_entry_read_offset = 0;
899     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
900     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
901     desc->desc2.sampler_count = 0;
902     desc->desc3.binding_table_entry_count = 0;
903     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
904
905     dri_bo_emit_reloc(bo,
906                       I915_GEM_DOMAIN_INSTRUCTION, 0,
907                       desc->desc0.grf_reg_blocks,
908                       offsetof(struct i965_interface_descriptor, desc0),
909                       pp_context->pp_modules[pp_index].kernel.bo);
910
911     dri_bo_emit_reloc(bo,
912                       I915_GEM_DOMAIN_INSTRUCTION, 0,
913                       desc->desc2.sampler_count << 2,
914                       offsetof(struct i965_interface_descriptor, desc2),
915                       pp_context->sampler_state_table.bo);
916
917     dri_bo_unmap(bo);
918     pp_context->idrt.num_interface_descriptors++;
919 }
920
921 static void
922 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
923 {
924     struct i965_vfe_state *vfe_state;
925     dri_bo *bo;
926
927     bo = pp_context->vfe_state.bo;
928     dri_bo_map(bo, 1);
929     assert(bo->virtual);
930     vfe_state = bo->virtual;
931     memset(vfe_state, 0, sizeof(*vfe_state));
932     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
933     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
934     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
935     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
936     vfe_state->vfe1.children_present = 0;
937     vfe_state->vfe2.interface_descriptor_base = 
938         pp_context->idrt.bo->offset >> 4; /* reloc */
939     dri_bo_emit_reloc(bo,
940                       I915_GEM_DOMAIN_INSTRUCTION, 0,
941                       0,
942                       offsetof(struct i965_vfe_state, vfe2),
943                       pp_context->idrt.bo);
944     dri_bo_unmap(bo);
945 }
946
947 static void
948 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
949 {
950     unsigned char *constant_buffer;
951     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
952
953     assert(sizeof(*pp_static_parameter) == 128);
954     dri_bo_map(pp_context->curbe.bo, 1);
955     assert(pp_context->curbe.bo->virtual);
956     constant_buffer = pp_context->curbe.bo->virtual;
957     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
958     dri_bo_unmap(pp_context->curbe.bo);
959 }
960
961 static void
962 ironlake_pp_states_setup(VADriverContextP ctx,
963                          struct i965_post_processing_context *pp_context)
964 {
965     ironlake_pp_interface_descriptor_table(pp_context);
966     ironlake_pp_vfe_state(pp_context);
967     ironlake_pp_upload_constants(pp_context);
968 }
969
970 static void
971 ironlake_pp_pipeline_select(VADriverContextP ctx,
972                             struct i965_post_processing_context *pp_context)
973 {
974     struct intel_batchbuffer *batch = pp_context->batch;
975
976     BEGIN_BATCH(batch, 1);
977     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
978     ADVANCE_BATCH(batch);
979 }
980
981 static void
982 ironlake_pp_urb_layout(VADriverContextP ctx,
983                        struct i965_post_processing_context *pp_context)
984 {
985     struct intel_batchbuffer *batch = pp_context->batch;
986     unsigned int vfe_fence, cs_fence;
987
988     vfe_fence = pp_context->urb.cs_start;
989     cs_fence = pp_context->urb.size;
990
991     BEGIN_BATCH(batch, 3);
992     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
993     OUT_BATCH(batch, 0);
994     OUT_BATCH(batch, 
995               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
996               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
997     ADVANCE_BATCH(batch);
998 }
999
1000 static void
1001 ironlake_pp_state_base_address(VADriverContextP ctx,
1002                                struct i965_post_processing_context *pp_context)
1003 {
1004     struct intel_batchbuffer *batch = pp_context->batch;
1005
1006     BEGIN_BATCH(batch, 8);
1007     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1008     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1009     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1010     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1011     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1012     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1013     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1014     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1015     ADVANCE_BATCH(batch);
1016 }
1017
1018 static void
1019 ironlake_pp_state_pointers(VADriverContextP ctx,
1020                            struct i965_post_processing_context *pp_context)
1021 {
1022     struct intel_batchbuffer *batch = pp_context->batch;
1023
1024     BEGIN_BATCH(batch, 3);
1025     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1026     OUT_BATCH(batch, 0);
1027     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1028     ADVANCE_BATCH(batch);
1029 }
1030
1031 static void 
1032 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1033                           struct i965_post_processing_context *pp_context)
1034 {
1035     struct intel_batchbuffer *batch = pp_context->batch;
1036
1037     BEGIN_BATCH(batch, 2);
1038     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1039     OUT_BATCH(batch,
1040               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1041               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1042     ADVANCE_BATCH(batch);
1043 }
1044
1045 static void
1046 ironlake_pp_constant_buffer(VADriverContextP ctx,
1047                             struct i965_post_processing_context *pp_context)
1048 {
1049     struct intel_batchbuffer *batch = pp_context->batch;
1050
1051     BEGIN_BATCH(batch, 2);
1052     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1053     OUT_RELOC(batch, pp_context->curbe.bo,
1054               I915_GEM_DOMAIN_INSTRUCTION, 0,
1055               pp_context->urb.size_cs_entry - 1);
1056     ADVANCE_BATCH(batch);    
1057 }
1058
1059 static void
1060 ironlake_pp_object_walker(VADriverContextP ctx,
1061                           struct i965_post_processing_context *pp_context)
1062 {
1063     struct intel_batchbuffer *batch = pp_context->batch;
1064     int x, x_steps, y, y_steps;
1065     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1066
1067     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1068     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1069
1070     for (y = 0; y < y_steps; y++) {
1071         for (x = 0; x < x_steps; x++) {
1072             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1073                 BEGIN_BATCH(batch, 20);
1074                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1075                 OUT_BATCH(batch, 0);
1076                 OUT_BATCH(batch, 0); /* no indirect data */
1077                 OUT_BATCH(batch, 0);
1078
1079                 /* inline data grf 5-6 */
1080                 assert(sizeof(*pp_inline_parameter) == 64);
1081                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1082
1083                 ADVANCE_BATCH(batch);
1084             }
1085         }
1086     }
1087 }
1088
1089 static void
1090 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1091                            struct i965_post_processing_context *pp_context)
1092 {
1093     struct intel_batchbuffer *batch = pp_context->batch;
1094
1095     intel_batchbuffer_start_atomic(batch, 0x1000);
1096     intel_batchbuffer_emit_mi_flush(batch);
1097     ironlake_pp_pipeline_select(ctx, pp_context);
1098     ironlake_pp_state_base_address(ctx, pp_context);
1099     ironlake_pp_state_pointers(ctx, pp_context);
1100     ironlake_pp_urb_layout(ctx, pp_context);
1101     ironlake_pp_cs_urb_layout(ctx, pp_context);
1102     ironlake_pp_constant_buffer(ctx, pp_context);
1103     ironlake_pp_object_walker(ctx, pp_context);
1104     intel_batchbuffer_end_atomic(batch);
1105 }
1106
1107 // update u/v offset when the surface format are packed yuv
1108 static void i965_update_src_surface_static_parameter(
1109     VADriverContextP    ctx, 
1110     struct i965_post_processing_context *pp_context,
1111     const struct i965_surface *surface)
1112 {
1113     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1114     int fourcc = pp_get_surface_fourcc(ctx, surface);
1115
1116     switch (fourcc) {
1117     case VA_FOURCC('Y', 'U', 'Y', '2'):
1118         pp_static_parameter->grf1.source_packed_u_offset = 1;
1119         pp_static_parameter->grf1.source_packed_v_offset = 3;
1120         break;
1121     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1122         pp_static_parameter->grf1.source_packed_y_offset = 1;
1123         pp_static_parameter->grf1.source_packed_v_offset = 2;
1124         break;
1125     case VA_FOURCC('B', 'G', 'R', 'X'):
1126     case VA_FOURCC('B', 'G', 'R', 'A'):
1127         pp_static_parameter->grf1.source_rgb_layout = 0;
1128         break;
1129     case VA_FOURCC('R', 'G', 'B', 'X'):
1130     case VA_FOURCC('R', 'G', 'B', 'A'):
1131         pp_static_parameter->grf1.source_rgb_layout = 1;
1132         break;
1133     default:
1134         break;
1135     }
1136     
1137 }
1138
1139 static void i965_update_dst_surface_static_parameter(
1140     VADriverContextP    ctx, 
1141     struct i965_post_processing_context *pp_context,
1142     const struct i965_surface *surface)
1143 {
1144     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1145     int fourcc = pp_get_surface_fourcc(ctx, surface);
1146
1147     switch (fourcc) {
1148     case VA_FOURCC('Y', 'U', 'Y', '2'):
1149         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1150         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1151         break;
1152     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1153         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1154         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1155         break;
1156     case VA_FOURCC('B', 'G', 'R', 'X'):
1157     case VA_FOURCC('B', 'G', 'R', 'A'):
1158         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1159         break;
1160     case VA_FOURCC('R', 'G', 'B', 'X'):
1161     case VA_FOURCC('R', 'G', 'B', 'A'):
1162         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1163         break;
1164     default:
1165         break;
1166     }
1167     
1168 }
1169
1170 static void
1171 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1172                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1173                           int width, int height, int pitch, int format, 
1174                           int index, int is_target)
1175 {
1176     struct i965_surface_state *ss;
1177     dri_bo *ss_bo;
1178     unsigned int tiling;
1179     unsigned int swizzle;
1180
1181     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1182     ss_bo = pp_context->surface_state_binding_table.bo;
1183     assert(ss_bo);
1184
1185     dri_bo_map(ss_bo, True);
1186     assert(ss_bo->virtual);
1187     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1188     memset(ss, 0, sizeof(*ss));
1189     ss->ss0.surface_type = I965_SURFACE_2D;
1190     ss->ss0.surface_format = format;
1191     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1192     ss->ss2.width = width - 1;
1193     ss->ss2.height = height - 1;
1194     ss->ss3.pitch = pitch - 1;
1195     pp_set_surface_tiling(ss, tiling);
1196     dri_bo_emit_reloc(ss_bo,
1197                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1198                       surf_bo_offset,
1199                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1200                       surf_bo);
1201     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1202     dri_bo_unmap(ss_bo);
1203 }
1204
1205 static void
1206 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1207                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1208                            int width, int height, int wpitch,
1209                            int xoffset, int yoffset,
1210                            int format, int interleave_chroma,
1211                            int index)
1212 {
1213     struct i965_surface_state2 *ss2;
1214     dri_bo *ss2_bo;
1215     unsigned int tiling;
1216     unsigned int swizzle;
1217
1218     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1219     ss2_bo = pp_context->surface_state_binding_table.bo;
1220     assert(ss2_bo);
1221
1222     dri_bo_map(ss2_bo, True);
1223     assert(ss2_bo->virtual);
1224     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1225     memset(ss2, 0, sizeof(*ss2));
1226     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1227     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1228     ss2->ss1.width = width - 1;
1229     ss2->ss1.height = height - 1;
1230     ss2->ss2.pitch = wpitch - 1;
1231     ss2->ss2.interleave_chroma = interleave_chroma;
1232     ss2->ss2.surface_format = format;
1233     ss2->ss3.x_offset_for_cb = xoffset;
1234     ss2->ss3.y_offset_for_cb = yoffset;
1235     pp_set_surface2_tiling(ss2, tiling);
1236     dri_bo_emit_reloc(ss2_bo,
1237                       I915_GEM_DOMAIN_RENDER, 0,
1238                       surf_bo_offset,
1239                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1240                       surf_bo);
1241     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1242     dri_bo_unmap(ss2_bo);
1243 }
1244
1245 static void
1246 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1247                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1248                           int width, int height, int pitch, int format, 
1249                           int index, int is_target)
1250 {
1251     struct gen7_surface_state *ss;
1252     dri_bo *ss_bo;
1253     unsigned int tiling;
1254     unsigned int swizzle;
1255
1256     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1257     ss_bo = pp_context->surface_state_binding_table.bo;
1258     assert(ss_bo);
1259
1260     dri_bo_map(ss_bo, True);
1261     assert(ss_bo->virtual);
1262     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1263     memset(ss, 0, sizeof(*ss));
1264     ss->ss0.surface_type = I965_SURFACE_2D;
1265     ss->ss0.surface_format = format;
1266     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1267     ss->ss2.width = width - 1;
1268     ss->ss2.height = height - 1;
1269     ss->ss3.pitch = pitch - 1;
1270     gen7_pp_set_surface_tiling(ss, tiling);
1271     dri_bo_emit_reloc(ss_bo,
1272                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1273                       surf_bo_offset,
1274                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1275                       surf_bo);
1276     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1277     dri_bo_unmap(ss_bo);
1278 }
1279
1280 static void
1281 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1282                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1283                            int width, int height, int wpitch,
1284                            int xoffset, int yoffset,
1285                            int format, int interleave_chroma,
1286                            int index)
1287 {
1288     struct gen7_surface_state2 *ss2;
1289     dri_bo *ss2_bo;
1290     unsigned int tiling;
1291     unsigned int swizzle;
1292
1293     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1294     ss2_bo = pp_context->surface_state_binding_table.bo;
1295     assert(ss2_bo);
1296
1297     dri_bo_map(ss2_bo, True);
1298     assert(ss2_bo->virtual);
1299     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1300     memset(ss2, 0, sizeof(*ss2));
1301     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1302     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1303     ss2->ss1.width = width - 1;
1304     ss2->ss1.height = height - 1;
1305     ss2->ss2.pitch = wpitch - 1;
1306     ss2->ss2.interleave_chroma = interleave_chroma;
1307     ss2->ss2.surface_format = format;
1308     ss2->ss3.x_offset_for_cb = xoffset;
1309     ss2->ss3.y_offset_for_cb = yoffset;
1310     gen7_pp_set_surface2_tiling(ss2, tiling);
1311     dri_bo_emit_reloc(ss2_bo,
1312                       I915_GEM_DOMAIN_RENDER, 0,
1313                       surf_bo_offset,
1314                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1315                       surf_bo);
1316     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1317     dri_bo_unmap(ss2_bo);
1318 }
1319
1320 static void 
1321 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1322                                 const struct i965_surface *surface, 
1323                                 int base_index, int is_target,
1324                                 int *width, int *height, int *pitch, int *offset)
1325 {
1326     struct i965_driver_data *i965 = i965_driver_data(ctx);
1327     struct object_surface *obj_surface;
1328     struct object_image *obj_image;
1329     dri_bo *bo;
1330     int fourcc = pp_get_surface_fourcc(ctx, surface);
1331     const int Y = 0;
1332     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1333     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1334     const int UV = 1;
1335     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1336     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1337     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1338                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1339                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1340                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1341     int scale_factor_of_1st_plane_width_in_byte = 1;
1342                               
1343     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1344         obj_surface = SURFACE(surface->id);
1345         bo = obj_surface->bo;
1346         width[0] = obj_surface->orig_width;
1347         height[0] = obj_surface->orig_height;
1348         pitch[0] = obj_surface->width;
1349         offset[0] = 0;
1350
1351         if (full_packed_format) {
1352             scale_factor_of_1st_plane_width_in_byte = 4; 
1353             pitch[0] = obj_surface->width * 4;
1354         }
1355         else if (packed_yuv ) {
1356             scale_factor_of_1st_plane_width_in_byte =  2; 
1357             pitch[0] = obj_surface->width * 2;
1358         }
1359         else if (interleaved_uv) {
1360             width[1] = obj_surface->orig_width;
1361             height[1] = obj_surface->orig_height / 2;
1362             pitch[1] = obj_surface->width;
1363             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1364         } else {
1365             width[1] = obj_surface->orig_width / 2;
1366             height[1] = obj_surface->orig_height / 2;
1367             pitch[1] = obj_surface->width / 2;
1368             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1369             width[2] = obj_surface->orig_width / 2;
1370             height[2] = obj_surface->orig_height / 2;
1371             pitch[2] = obj_surface->width / 2;
1372             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1373         }
1374     } else {
1375         obj_image = IMAGE(surface->id);
1376         bo = obj_image->bo;
1377         width[0] = obj_image->image.width;
1378         height[0] = obj_image->image.height;
1379         pitch[0] = obj_image->image.pitches[0];
1380         offset[0] = obj_image->image.offsets[0];
1381
1382         if (full_packed_format) {
1383             scale_factor_of_1st_plane_width_in_byte = 4;
1384         }
1385         else if (packed_yuv ) {
1386             scale_factor_of_1st_plane_width_in_byte = 2;
1387         }
1388         else if (interleaved_uv) {
1389             width[1] = obj_image->image.width;
1390             height[1] = obj_image->image.height / 2;
1391             pitch[1] = obj_image->image.pitches[1];
1392             offset[1] = obj_image->image.offsets[1];
1393         } else {
1394             width[1] = obj_image->image.width / 2;
1395             height[1] = obj_image->image.height / 2;
1396             pitch[1] = obj_image->image.pitches[1];
1397             offset[1] = obj_image->image.offsets[1];
1398             width[2] = obj_image->image.width / 2;
1399             height[2] = obj_image->image.height / 2;
1400             pitch[2] = obj_image->image.pitches[2];
1401             offset[2] = obj_image->image.offsets[2];
1402         }
1403     }
1404
1405     /* Y surface */
1406     i965_pp_set_surface_state(ctx, pp_context,
1407                               bo, offset[Y],
1408                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1409                               base_index, is_target);
1410
1411     if (!packed_yuv && !full_packed_format) {
1412         if (interleaved_uv) {
1413             i965_pp_set_surface_state(ctx, pp_context,
1414                                       bo, offset[UV],
1415                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1416                                       base_index + 1, is_target);
1417         } else {
1418             /* U surface */
1419             i965_pp_set_surface_state(ctx, pp_context,
1420                                       bo, offset[U],
1421                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1422                                       base_index + 1, is_target);
1423
1424             /* V surface */
1425             i965_pp_set_surface_state(ctx, pp_context,
1426                                       bo, offset[V],
1427                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1428                                       base_index + 2, is_target);
1429         }
1430     }
1431
1432 }
1433
1434 static void 
1435 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1436                                      const struct i965_surface *surface, 
1437                                      int base_index, int is_target,
1438                                      int *width, int *height, int *pitch, int *offset)
1439 {
1440     struct i965_driver_data *i965 = i965_driver_data(ctx);
1441     struct object_surface *obj_surface;
1442     struct object_image *obj_image;
1443     dri_bo *bo;
1444     int fourcc = pp_get_surface_fourcc(ctx, surface);
1445     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1446                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1447     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1448                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1449     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1450     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1451
1452     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1453         obj_surface = SURFACE(surface->id);
1454         bo = obj_surface->bo;
1455         width[0] = obj_surface->orig_width;
1456         height[0] = obj_surface->orig_height;
1457         pitch[0] = obj_surface->width;
1458         offset[0] = 0;
1459
1460         if (packed_yuv) {
1461             if (is_target)
1462                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1463             else
1464                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1465
1466             pitch[0] = obj_surface->width * 2;
1467         }
1468
1469         width[1] = obj_surface->cb_cr_width;
1470         height[1] = obj_surface->cb_cr_height;
1471         pitch[1] = obj_surface->cb_cr_pitch;
1472         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1473
1474         width[2] = obj_surface->cb_cr_width;
1475         height[2] = obj_surface->cb_cr_height;
1476         pitch[2] = obj_surface->cb_cr_pitch;
1477         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1478     } else {
1479         obj_image = IMAGE(surface->id);
1480         bo = obj_image->bo;
1481         width[0] = obj_image->image.width;
1482         height[0] = obj_image->image.height;
1483         pitch[0] = obj_image->image.pitches[0];
1484         offset[0] = obj_image->image.offsets[0];
1485
1486         if (packed_yuv) {
1487             if (is_target)
1488                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1489             else
1490                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1491         } else if (interleaved_uv) {
1492             width[1] = obj_image->image.width / 2;
1493             height[1] = obj_image->image.height / 2;
1494             pitch[1] = obj_image->image.pitches[1];
1495             offset[1] = obj_image->image.offsets[1];
1496         } else {
1497             width[1] = obj_image->image.width / 2;
1498             height[1] = obj_image->image.height / 2;
1499             pitch[1] = obj_image->image.pitches[U];
1500             offset[1] = obj_image->image.offsets[U];
1501             width[2] = obj_image->image.width / 2;
1502             height[2] = obj_image->image.height / 2;
1503             pitch[2] = obj_image->image.pitches[V];
1504             offset[2] = obj_image->image.offsets[V];
1505         }
1506     }
1507
1508     if (is_target) {
1509         gen7_pp_set_surface_state(ctx, pp_context,
1510                                   bo, 0,
1511                                   width[0] / 4, height[0], pitch[0],
1512                                   I965_SURFACEFORMAT_R8_SINT,
1513                                   base_index, 1);
1514
1515         if (!packed_yuv) {
1516             if (interleaved_uv) {
1517                 gen7_pp_set_surface_state(ctx, pp_context,
1518                                           bo, offset[1],
1519                                           width[1] / 2, height[1], pitch[1],
1520                                           I965_SURFACEFORMAT_R8G8_SINT,
1521                                           base_index + 1, 1);
1522             } else {
1523                 gen7_pp_set_surface_state(ctx, pp_context,
1524                                           bo, offset[1],
1525                                           width[1] / 4, height[1], pitch[1],
1526                                           I965_SURFACEFORMAT_R8_SINT,
1527                                           base_index + 1, 1);
1528                 gen7_pp_set_surface_state(ctx, pp_context,
1529                                           bo, offset[2],
1530                                           width[2] / 4, height[2], pitch[2],
1531                                           I965_SURFACEFORMAT_R8_SINT,
1532                                           base_index + 2, 1);
1533             }
1534         }
1535     } else {
1536         int format0 = SURFACE_FORMAT_Y8_UNORM;
1537
1538         switch (fourcc) {
1539         case VA_FOURCC('Y', 'U', 'Y', '2'):
1540             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1541             break;
1542
1543         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1544             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1545             break;
1546
1547         default:
1548             break;
1549         }
1550
1551         gen7_pp_set_surface2_state(ctx, pp_context,
1552                                    bo, offset[0],
1553                                    width[0], height[0], pitch[0],
1554                                    0, 0,
1555                                    format0, 0,
1556                                    base_index);
1557
1558         if (!packed_yuv) {
1559             if (interleaved_uv) {
1560                 gen7_pp_set_surface2_state(ctx, pp_context,
1561                                            bo, offset[1],
1562                                            width[1], height[1], pitch[1],
1563                                            0, 0,
1564                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1565                                            base_index + 1);
1566             } else {
1567                 gen7_pp_set_surface2_state(ctx, pp_context,
1568                                            bo, offset[1],
1569                                            width[1], height[1], pitch[1],
1570                                            0, 0,
1571                                            SURFACE_FORMAT_R8_UNORM, 0,
1572                                            base_index + 1);
1573                 gen7_pp_set_surface2_state(ctx, pp_context,
1574                                            bo, offset[2],
1575                                            width[2], height[2], pitch[2],
1576                                            0, 0,
1577                                            SURFACE_FORMAT_R8_UNORM, 0,
1578                                            base_index + 2);
1579             }
1580         }
1581     }
1582 }
1583
1584 static int
1585 pp_null_x_steps(void *private_context)
1586 {
1587     return 1;
1588 }
1589
1590 static int
1591 pp_null_y_steps(void *private_context)
1592 {
1593     return 1;
1594 }
1595
1596 static int
1597 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1598 {
1599     return 0;
1600 }
1601
1602 static VAStatus
1603 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1604                    const struct i965_surface *src_surface,
1605                    const VARectangle *src_rect,
1606                    struct i965_surface *dst_surface,
1607                    const VARectangle *dst_rect,
1608                    void *filter_param)
1609 {
1610     /* private function & data */
1611     pp_context->pp_x_steps = pp_null_x_steps;
1612     pp_context->pp_y_steps = pp_null_y_steps;
1613     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1614
1615     dst_surface->flags = src_surface->flags;
1616
1617     return VA_STATUS_SUCCESS;
1618 }
1619
1620 static int
1621 pp_load_save_x_steps(void *private_context)
1622 {
1623     return 1;
1624 }
1625
1626 static int
1627 pp_load_save_y_steps(void *private_context)
1628 {
1629     struct pp_load_save_context *pp_load_save_context = private_context;
1630
1631     return pp_load_save_context->dest_h / 8;
1632 }
1633
1634 static int
1635 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1636 {
1637     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1638
1639     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1640     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1641     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
1642     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
1643
1644     return 0;
1645 }
1646
1647 static VAStatus
1648 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1649                                 const struct i965_surface *src_surface,
1650                                 const VARectangle *src_rect,
1651                                 struct i965_surface *dst_surface,
1652                                 const VARectangle *dst_rect,
1653                                 void *filter_param)
1654 {
1655     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1656     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1657     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1658     int width[3], height[3], pitch[3], offset[3];
1659     const int Y = 0;
1660
1661     /* source surface */
1662     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
1663                                     width, height, pitch, offset);
1664
1665     /* destination surface */
1666     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
1667                                     width, height, pitch, offset);
1668
1669     /* private function & data */
1670     pp_context->pp_x_steps = pp_load_save_x_steps;
1671     pp_context->pp_y_steps = pp_load_save_y_steps;
1672     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
1673     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
1674     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
1675
1676     pp_inline_parameter->grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
1677     pp_inline_parameter->grf5.number_blocks = ALIGN(width[Y], 16) / 16;
1678
1679     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
1680     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
1681
1682     // update u/v offset for packed yuv
1683     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
1684     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
1685
1686     dst_surface->flags = src_surface->flags;
1687
1688     return VA_STATUS_SUCCESS;
1689 }
1690
1691 static int
1692 pp_scaling_x_steps(void *private_context)
1693 {
1694     return 1;
1695 }
1696
1697 static int
1698 pp_scaling_y_steps(void *private_context)
1699 {
1700     struct pp_scaling_context *pp_scaling_context = private_context;
1701
1702     return pp_scaling_context->dest_h / 8;
1703 }
1704
1705 static int
1706 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1707 {
1708     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1709     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1710     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1711     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1712     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1713
1714     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
1715     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
1716     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
1717     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
1718     
1719     return 0;
1720 }
1721
1722 static VAStatus
1723 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1724                            const struct i965_surface *src_surface,
1725                            const VARectangle *src_rect,
1726                            struct i965_surface *dst_surface,
1727                            const VARectangle *dst_rect,
1728                            void *filter_param)
1729 {
1730     struct i965_driver_data *i965 = i965_driver_data(ctx);
1731     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
1732     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1733     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1734     struct object_surface *obj_surface;
1735     struct i965_sampler_state *sampler_state;
1736     int in_w, in_h, in_wpitch, in_hpitch;
1737     int out_w, out_h, out_wpitch, out_hpitch;
1738
1739     /* source surface */
1740     obj_surface = SURFACE(src_surface->id);
1741     in_w = obj_surface->orig_width;
1742     in_h = obj_surface->orig_height;
1743     in_wpitch = obj_surface->width;
1744     in_hpitch = obj_surface->height;
1745
1746     /* source Y surface index 1 */
1747     i965_pp_set_surface_state(ctx, pp_context,
1748                               obj_surface->bo, 0,
1749                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1750                               1, 0);
1751
1752     /* source UV surface index 2 */
1753     i965_pp_set_surface_state(ctx, pp_context,
1754                               obj_surface->bo, in_wpitch * in_hpitch,
1755                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1756                               2, 0);
1757
1758     /* destination surface */
1759     obj_surface = SURFACE(dst_surface->id);
1760     out_w = obj_surface->orig_width;
1761     out_h = obj_surface->orig_height;
1762     out_wpitch = obj_surface->width;
1763     out_hpitch = obj_surface->height;
1764
1765     /* destination Y surface index 7 */
1766     i965_pp_set_surface_state(ctx, pp_context,
1767                               obj_surface->bo, 0,
1768                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1769                               7, 1);
1770
1771     /* destination UV surface index 8 */
1772     i965_pp_set_surface_state(ctx, pp_context,
1773                               obj_surface->bo, out_wpitch * out_hpitch,
1774                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1775                               8, 1);
1776
1777     /* sampler state */
1778     dri_bo_map(pp_context->sampler_state_table.bo, True);
1779     assert(pp_context->sampler_state_table.bo->virtual);
1780     sampler_state = pp_context->sampler_state_table.bo->virtual;
1781
1782     /* SIMD16 Y index 1 */
1783     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1784     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1785     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1786     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1787     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1788
1789     /* SIMD16 UV index 2 */
1790     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1791     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1792     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1793     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1794     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1795
1796     dri_bo_unmap(pp_context->sampler_state_table.bo);
1797
1798     /* private function & data */
1799     pp_context->pp_x_steps = pp_scaling_x_steps;
1800     pp_context->pp_y_steps = pp_scaling_y_steps;
1801     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1802
1803     pp_scaling_context->dest_x = dst_rect->x;
1804     pp_scaling_context->dest_y = dst_rect->y;
1805     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1806     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1807     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w;
1808     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
1809
1810     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
1811
1812     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
1813     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1814     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
1815     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
1816     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
1817
1818     dst_surface->flags = src_surface->flags;
1819
1820     return VA_STATUS_SUCCESS;
1821 }
1822
1823 static int
1824 pp_avs_x_steps(void *private_context)
1825 {
1826     struct pp_avs_context *pp_avs_context = private_context;
1827
1828     return pp_avs_context->dest_w / 16;
1829 }
1830
1831 static int
1832 pp_avs_y_steps(void *private_context)
1833 {
1834     return 1;
1835 }
1836
1837 static int
1838 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1839 {
1840     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1841     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1842     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1843     float src_x_steping, src_y_steping, video_step_delta;
1844     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1845
1846     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
1847         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1848         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
1849     } else if (tmp_w >= pp_avs_context->dest_w) {
1850         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1851         pp_inline_parameter->grf6.video_step_delta = 0;
1852         
1853         if (x == 0) {
1854             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1855                 pp_avs_context->src_normalized_x;
1856         } else {
1857             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1858             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1859             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1860                 16 * 15 * video_step_delta / 2;
1861         }
1862     } else {
1863         int n0, n1, n2, nls_left, nls_right;
1864         int factor_a = 5, factor_b = 4;
1865         float f;
1866
1867         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1868         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1869         n2 = tmp_w / (16 * factor_a);
1870         nls_left = n0 + n2;
1871         nls_right = n1 + n2;
1872         f = (float) n2 * 16 / tmp_w;
1873         
1874         if (n0 < 5) {
1875             pp_inline_parameter->grf6.video_step_delta = 0.0;
1876
1877             if (x == 0) {
1878                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1879                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1880             } else {
1881                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1882                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1883                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1884                     16 * 15 * video_step_delta / 2;
1885             }
1886         } else {
1887             if (x < nls_left) {
1888                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1889                 float a = f / (nls_left * 16 * factor_b);
1890                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1891                 
1892                 pp_inline_parameter->grf6.video_step_delta = b;
1893
1894                 if (x == 0) {
1895                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1896                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
1897                 } else {
1898                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1899                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1900                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1901                         16 * 15 * video_step_delta / 2;
1902                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
1903                 }
1904             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1905                 /* scale the center linearly */
1906                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1907                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1908                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1909                     16 * 15 * video_step_delta / 2;
1910                 pp_inline_parameter->grf6.video_step_delta = 0.0;
1911                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1912             } else {
1913                 float a = f / (nls_right * 16 * factor_b);
1914                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1915
1916                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
1917                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
1918                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1919                     16 * 15 * video_step_delta / 2;
1920                 pp_inline_parameter->grf6.video_step_delta = -b;
1921
1922                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1923                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1924                 else
1925                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
1926             }
1927         }
1928     }
1929
1930     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
1931     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1932     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1933     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1934
1935     return 0;
1936 }
1937
1938 static VAStatus
1939 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1940                        const struct i965_surface *src_surface,
1941                        const VARectangle *src_rect,
1942                        struct i965_surface *dst_surface,
1943                        const VARectangle *dst_rect,
1944                        void *filter_param,
1945                        int nlas)
1946 {
1947     struct i965_driver_data *i965 = i965_driver_data(ctx);
1948     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1949     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1950     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1951     struct object_surface *obj_surface;
1952     struct i965_sampler_8x8 *sampler_8x8;
1953     struct i965_sampler_8x8_state *sampler_8x8_state;
1954     int index;
1955     int in_w, in_h, in_wpitch, in_hpitch;
1956     int out_w, out_h, out_wpitch, out_hpitch;
1957     int i;
1958
1959     /* surface */
1960     obj_surface = SURFACE(src_surface->id);
1961     in_w = obj_surface->orig_width;
1962     in_h = obj_surface->orig_height;
1963     in_wpitch = obj_surface->width;
1964     in_hpitch = obj_surface->height;
1965
1966     /* source Y surface index 1 */
1967     i965_pp_set_surface2_state(ctx, pp_context,
1968                                obj_surface->bo, 0,
1969                                in_w, in_h, in_wpitch,
1970                                0, 0,
1971                                SURFACE_FORMAT_Y8_UNORM, 0,
1972                                1);
1973
1974     /* source UV surface index 2 */
1975     i965_pp_set_surface2_state(ctx, pp_context,
1976                                obj_surface->bo, in_wpitch * in_hpitch,
1977                                in_w / 2, in_h / 2, in_wpitch,
1978                                0, 0,
1979                                SURFACE_FORMAT_R8B8_UNORM, 0,
1980                                2);
1981
1982     /* destination surface */
1983     obj_surface = SURFACE(dst_surface->id);
1984     out_w = obj_surface->orig_width;
1985     out_h = obj_surface->orig_height;
1986     out_wpitch = obj_surface->width;
1987     out_hpitch = obj_surface->height;
1988     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1989
1990     /* destination Y surface index 7 */
1991     i965_pp_set_surface_state(ctx, pp_context,
1992                               obj_surface->bo, 0,
1993                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1994                               7, 1);
1995
1996     /* destination UV surface index 8 */
1997     i965_pp_set_surface_state(ctx, pp_context,
1998                               obj_surface->bo, out_wpitch * out_hpitch,
1999                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2000                               8, 1);
2001
2002     /* sampler 8x8 state */
2003     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2004     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2005     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2006     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2007     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2008
2009     for (i = 0; i < 17; i++) {
2010         /* for Y channel, currently ignore */
2011         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2012         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2013         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2014         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2015         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2016         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2017         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2018         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2019         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2020         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2021         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2022         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2023         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2024         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2025         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2026         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2027         /* for U/V channel, 0.25 */
2028         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2029         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2030         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2031         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2032         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2033         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2034         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2035         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2036         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2037         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2038         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2039         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2040         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2041         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2042         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2043         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2044     }
2045
2046     sampler_8x8_state->dw136.default_sharpness_level = 0;
2047     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2048     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2049     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2050     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2051
2052     /* sampler 8x8 */
2053     dri_bo_map(pp_context->sampler_state_table.bo, True);
2054     assert(pp_context->sampler_state_table.bo->virtual);
2055     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2056     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2057
2058     /* sample_8x8 Y index 1 */
2059     index = 1;
2060     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2061     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2062     sampler_8x8[index].dw0.ief_bypass = 1;
2063     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2064     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2065     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2066     sampler_8x8[index].dw2.global_noise_estimation = 22;
2067     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2068     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2069     sampler_8x8[index].dw3.strong_edge_weight = 7;
2070     sampler_8x8[index].dw3.regular_weight = 2;
2071     sampler_8x8[index].dw3.non_edge_weight = 0;
2072     sampler_8x8[index].dw3.gain_factor = 40;
2073     sampler_8x8[index].dw4.steepness_boost = 0;
2074     sampler_8x8[index].dw4.steepness_threshold = 0;
2075     sampler_8x8[index].dw4.mr_boost = 0;
2076     sampler_8x8[index].dw4.mr_threshold = 5;
2077     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2078     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2079     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2080     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2081     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2082     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2083     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2084     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2085     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2086     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2087     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2088     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2089     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2090     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2091     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2092     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2093     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2094     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2095     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2096     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2097     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2098     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2099     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2100     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2101     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2102     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2103     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2104     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2105     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2106     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2107     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2108     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2109     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2110     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2111     sampler_8x8[index].dw13.limiter_boost = 0;
2112     sampler_8x8[index].dw13.minimum_limiter = 10;
2113     sampler_8x8[index].dw13.maximum_limiter = 11;
2114     sampler_8x8[index].dw14.clip_limiter = 130;
2115     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2116                       I915_GEM_DOMAIN_RENDER, 
2117                       0,
2118                       0,
2119                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2120                       pp_context->sampler_state_table.bo_8x8);
2121
2122     /* sample_8x8 UV index 2 */
2123     index = 2;
2124     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2125     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2126     sampler_8x8[index].dw0.ief_bypass = 1;
2127     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2128     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2129     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2130     sampler_8x8[index].dw2.global_noise_estimation = 22;
2131     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2132     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2133     sampler_8x8[index].dw3.strong_edge_weight = 7;
2134     sampler_8x8[index].dw3.regular_weight = 2;
2135     sampler_8x8[index].dw3.non_edge_weight = 0;
2136     sampler_8x8[index].dw3.gain_factor = 40;
2137     sampler_8x8[index].dw4.steepness_boost = 0;
2138     sampler_8x8[index].dw4.steepness_threshold = 0;
2139     sampler_8x8[index].dw4.mr_boost = 0;
2140     sampler_8x8[index].dw4.mr_threshold = 5;
2141     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2142     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2143     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2144     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2145     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2146     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2147     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2148     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2149     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2150     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2151     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2152     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2153     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2154     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2155     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2156     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2157     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2158     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2159     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2160     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2161     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2162     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2163     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2164     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2165     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2166     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2167     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2168     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2169     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2170     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2171     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2172     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2173     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2174     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2175     sampler_8x8[index].dw13.limiter_boost = 0;
2176     sampler_8x8[index].dw13.minimum_limiter = 10;
2177     sampler_8x8[index].dw13.maximum_limiter = 11;
2178     sampler_8x8[index].dw14.clip_limiter = 130;
2179     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2180                       I915_GEM_DOMAIN_RENDER, 
2181                       0,
2182                       0,
2183                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2184                       pp_context->sampler_state_table.bo_8x8);
2185
2186     dri_bo_unmap(pp_context->sampler_state_table.bo);
2187
2188     /* private function & data */
2189     pp_context->pp_x_steps = pp_avs_x_steps;
2190     pp_context->pp_y_steps = pp_avs_y_steps;
2191     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2192
2193     pp_avs_context->dest_x = dst_rect->x;
2194     pp_avs_context->dest_y = dst_rect->y;
2195     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2196     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2197     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w;
2198     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2199     pp_avs_context->src_w = src_rect->width;
2200     pp_avs_context->src_h = src_rect->height;
2201
2202     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2203     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2204
2205     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / dst_rect->width;
2206     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2207     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2208     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2209     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2210     pp_inline_parameter->grf6.video_step_delta = 0.0;
2211
2212     dst_surface->flags = src_surface->flags;
2213
2214     return VA_STATUS_SUCCESS;
2215 }
2216
2217 static VAStatus
2218 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2219                             const struct i965_surface *src_surface,
2220                             const VARectangle *src_rect,
2221                             struct i965_surface *dst_surface,
2222                             const VARectangle *dst_rect,
2223                             void *filter_param)
2224 {
2225     return pp_nv12_avs_initialize(ctx, pp_context,
2226                                   src_surface,
2227                                   src_rect,
2228                                   dst_surface,
2229                                   dst_rect,
2230                                   filter_param,
2231                                   1);
2232 }
2233
2234 static VAStatus
2235 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2236                              const struct i965_surface *src_surface,
2237                              const VARectangle *src_rect,
2238                              struct i965_surface *dst_surface,
2239                              const VARectangle *dst_rect,
2240                              void *filter_param)
2241 {
2242     return pp_nv12_avs_initialize(ctx, pp_context,
2243                                   src_surface,
2244                                   src_rect,
2245                                   dst_surface,
2246                                   dst_rect,
2247                                   filter_param,
2248                                   0);    
2249 }
2250
2251 static int
2252 gen7_pp_avs_x_steps(void *private_context)
2253 {
2254     struct pp_avs_context *pp_avs_context = private_context;
2255
2256     return pp_avs_context->dest_w / 16;
2257 }
2258
2259 static int
2260 gen7_pp_avs_y_steps(void *private_context)
2261 {
2262     struct pp_avs_context *pp_avs_context = private_context;
2263
2264     return pp_avs_context->dest_h / 16;
2265 }
2266
2267 static int
2268 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2269 {
2270     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2271     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2272
2273     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2274     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2275     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2276     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2277
2278     return 0;
2279 }
2280
2281 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2282                                               struct i965_post_processing_context *pp_context,
2283                                               const struct i965_surface *surface)
2284 {
2285     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2286     int fourcc = pp_get_surface_fourcc(ctx, surface);
2287     
2288     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2289         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2290         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2291         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2292     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2293         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2294         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2295         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2296     }
2297 }
2298
2299 static VAStatus
2300 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2301                            const struct i965_surface *src_surface,
2302                            const VARectangle *src_rect,
2303                            struct i965_surface *dst_surface,
2304                            const VARectangle *dst_rect,
2305                            void *filter_param)
2306 {
2307     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2308     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2309     struct gen7_sampler_8x8 *sampler_8x8;
2310     struct i965_sampler_8x8_state *sampler_8x8_state;
2311     int index, i;
2312     int width[3], height[3], pitch[3], offset[3];
2313
2314     /* source surface */
2315     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2316                                          width, height, pitch, offset);
2317
2318     /* destination surface */
2319     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2320                                          width, height, pitch, offset);
2321
2322     /* sampler 8x8 state */
2323     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2324     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2325     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2326     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2327     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2328
2329     for (i = 0; i < 17; i++) {
2330         /* for Y channel, currently ignore */
2331         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2332         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2333         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2334         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x0;
2335         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x0;
2336         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2337         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2338         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2339         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2340         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2341         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2342         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x0;
2343         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x0;
2344         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2345         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2346         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2347         /* for U/V channel, 0.25 */
2348         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2349         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2350         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2351         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2352         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2353         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2354         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2355         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2356         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2357         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2358         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2359         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2360         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2361         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2362         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2363         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2364     }
2365
2366     sampler_8x8_state->dw136.default_sharpness_level = 0;
2367     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2368     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2369     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2370     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2371
2372     /* sampler 8x8 */
2373     dri_bo_map(pp_context->sampler_state_table.bo, True);
2374     assert(pp_context->sampler_state_table.bo->virtual);
2375     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2376     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2377
2378     /* sample_8x8 Y index 4 */
2379     index = 4;
2380     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2381     sampler_8x8[index].dw0.global_noise_estimation = 255;
2382     sampler_8x8[index].dw0.ief_bypass = 1;
2383
2384     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2385
2386     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2387     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2388     sampler_8x8[index].dw2.r5x_coefficient = 9;
2389     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2390     sampler_8x8[index].dw2.r5c_coefficient = 3;
2391
2392     sampler_8x8[index].dw3.r3x_coefficient = 27;
2393     sampler_8x8[index].dw3.r3c_coefficient = 5;
2394     sampler_8x8[index].dw3.gain_factor = 40;
2395     sampler_8x8[index].dw3.non_edge_weight = 1;
2396     sampler_8x8[index].dw3.regular_weight = 2;
2397     sampler_8x8[index].dw3.strong_edge_weight = 7;
2398     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2399
2400     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2401                       I915_GEM_DOMAIN_RENDER, 
2402                       0,
2403                       0,
2404                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2405                       pp_context->sampler_state_table.bo_8x8);
2406
2407     /* sample_8x8 UV index 8 */
2408     index = 8;
2409     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2410     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2411     sampler_8x8[index].dw0.global_noise_estimation = 255;
2412     sampler_8x8[index].dw0.ief_bypass = 1;
2413     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2414     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2415     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2416     sampler_8x8[index].dw2.r5x_coefficient = 9;
2417     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2418     sampler_8x8[index].dw2.r5c_coefficient = 3;
2419     sampler_8x8[index].dw3.r3x_coefficient = 27;
2420     sampler_8x8[index].dw3.r3c_coefficient = 5;
2421     sampler_8x8[index].dw3.gain_factor = 40;
2422     sampler_8x8[index].dw3.non_edge_weight = 1;
2423     sampler_8x8[index].dw3.regular_weight = 2;
2424     sampler_8x8[index].dw3.strong_edge_weight = 7;
2425     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2426
2427     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2428                       I915_GEM_DOMAIN_RENDER, 
2429                       0,
2430                       0,
2431                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2432                       pp_context->sampler_state_table.bo_8x8);
2433
2434     /* sampler_8x8 V, index 12 */
2435     index = 12;
2436     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2437     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2438     sampler_8x8[index].dw0.global_noise_estimation = 255;
2439     sampler_8x8[index].dw0.ief_bypass = 1;
2440     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2441     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2442     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2443     sampler_8x8[index].dw2.r5x_coefficient = 9;
2444     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2445     sampler_8x8[index].dw2.r5c_coefficient = 3;
2446     sampler_8x8[index].dw3.r3x_coefficient = 27;
2447     sampler_8x8[index].dw3.r3c_coefficient = 5;
2448     sampler_8x8[index].dw3.gain_factor = 40;
2449     sampler_8x8[index].dw3.non_edge_weight = 1;
2450     sampler_8x8[index].dw3.regular_weight = 2;
2451     sampler_8x8[index].dw3.strong_edge_weight = 7;
2452     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2453
2454     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2455                       I915_GEM_DOMAIN_RENDER, 
2456                       0,
2457                       0,
2458                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2459                       pp_context->sampler_state_table.bo_8x8);
2460
2461     dri_bo_unmap(pp_context->sampler_state_table.bo);
2462
2463     /* private function & data */
2464     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2465     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2466     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2467
2468     pp_avs_context->dest_x = dst_rect->x;
2469     pp_avs_context->dest_y = dst_rect->y;
2470     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2471     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2472     pp_avs_context->src_w = src_rect->width;
2473     pp_avs_context->src_h = src_rect->height;
2474
2475     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2476     dw = MAX(dw, pp_avs_context->dest_w);
2477
2478     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2479     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2480     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) 1.0 / pp_avs_context->dest_h;
2481     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2482     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2483
2484     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2485
2486     dst_surface->flags = src_surface->flags;
2487
2488     return VA_STATUS_SUCCESS;
2489 }
2490
2491 static int
2492 pp_dndi_x_steps(void *private_context)
2493 {
2494     return 1;
2495 }
2496
2497 static int
2498 pp_dndi_y_steps(void *private_context)
2499 {
2500     struct pp_dndi_context *pp_dndi_context = private_context;
2501
2502     return pp_dndi_context->dest_h / 4;
2503 }
2504
2505 static int
2506 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2507 {
2508     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2509
2510     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2511     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2512
2513     return 0;
2514 }
2515
2516 static VAStatus
2517 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2518                         const struct i965_surface *src_surface,
2519                         const VARectangle *src_rect,
2520                         struct i965_surface *dst_surface,
2521                         const VARectangle *dst_rect,
2522                         void *filter_param)
2523 {
2524     struct i965_driver_data *i965 = i965_driver_data(ctx);
2525     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2526     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2527     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2528     struct object_surface *obj_surface;
2529     struct i965_sampler_dndi *sampler_dndi;
2530     int index;
2531     int w, h;
2532     int orig_w, orig_h;
2533     int dndi_top_first = 1;
2534
2535     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2536         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2537
2538     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2539         dndi_top_first = 1;
2540     else
2541         dndi_top_first = 0;
2542
2543     /* surface */
2544     obj_surface = SURFACE(src_surface->id);
2545     orig_w = obj_surface->orig_width;
2546     orig_h = obj_surface->orig_height;
2547     w = obj_surface->width;
2548     h = obj_surface->height;
2549
2550     if (pp_context->stmm.bo == NULL) {
2551         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2552                                            "STMM surface",
2553                                            w * h,
2554                                            4096);
2555         assert(pp_context->stmm.bo);
2556     }
2557
2558     /* source UV surface index 2 */
2559     i965_pp_set_surface_state(ctx, pp_context,
2560                               obj_surface->bo, w * h,
2561                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2562                               2, 0);
2563
2564     /* source YUV surface index 4 */
2565     i965_pp_set_surface2_state(ctx, pp_context,
2566                                obj_surface->bo, 0,
2567                                orig_w, orig_h, w,
2568                                0, h,
2569                                SURFACE_FORMAT_PLANAR_420_8, 1,
2570                                4);
2571
2572     /* source STMM surface index 20 */
2573     i965_pp_set_surface_state(ctx, pp_context,
2574                               pp_context->stmm.bo, 0,
2575                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2576                               20, 1);
2577
2578     /* destination surface */
2579     obj_surface = SURFACE(dst_surface->id);
2580     orig_w = obj_surface->orig_width;
2581     orig_h = obj_surface->orig_height;
2582     w = obj_surface->width;
2583     h = obj_surface->height;
2584
2585     /* destination Y surface index 7 */
2586     i965_pp_set_surface_state(ctx, pp_context,
2587                               obj_surface->bo, 0,
2588                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2589                               7, 1);
2590
2591     /* destination UV surface index 8 */
2592     i965_pp_set_surface_state(ctx, pp_context,
2593                               obj_surface->bo, w * h,
2594                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2595                               8, 1);
2596     /* sampler dndi */
2597     dri_bo_map(pp_context->sampler_state_table.bo, True);
2598     assert(pp_context->sampler_state_table.bo->virtual);
2599     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2600     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2601
2602     /* sample dndi index 1 */
2603     index = 0;
2604     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2605     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2606     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2607     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2608
2609     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2610     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
2611     sampler_dndi[index].dw1.stmm_c2 = 1;
2612     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2613     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2614
2615     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
2616     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2617     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2618     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
2619
2620     sampler_dndi[index].dw3.maximum_stmm = 128;
2621     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2622     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2623     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2624     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2625
2626     sampler_dndi[index].dw4.sdi_delta = 8;
2627     sampler_dndi[index].dw4.sdi_threshold = 128;
2628     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2629     sampler_dndi[index].dw4.stmm_shift_up = 0;
2630     sampler_dndi[index].dw4.stmm_shift_down = 0;
2631     sampler_dndi[index].dw4.minimum_stmm = 0;
2632
2633     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
2634     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
2635     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
2636     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
2637
2638     sampler_dndi[index].dw6.dn_enable = 1;
2639     sampler_dndi[index].dw6.di_enable = 1;
2640     sampler_dndi[index].dw6.di_partial = 0;
2641     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2642     sampler_dndi[index].dw6.dndi_stream_id = 0;
2643     sampler_dndi[index].dw6.dndi_first_frame = 1;
2644     sampler_dndi[index].dw6.progressive_dn = 0;
2645     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
2646     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2647     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2648
2649     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
2650     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
2651     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2652     sampler_dndi[index].dw7.column_width_minus1 = 0;
2653
2654     dri_bo_unmap(pp_context->sampler_state_table.bo);
2655
2656     /* private function & data */
2657     pp_context->pp_x_steps = pp_dndi_x_steps;
2658     pp_context->pp_y_steps = pp_dndi_y_steps;
2659     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
2660
2661     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2662     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
2663     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
2664     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
2665
2666     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2667     pp_inline_parameter->grf5.number_blocks = w / 16;
2668     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2669     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2670
2671     pp_dndi_context->dest_w = w;
2672     pp_dndi_context->dest_h = h;
2673
2674     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
2675
2676     return VA_STATUS_SUCCESS;
2677 }
2678
2679 static int
2680 pp_dn_x_steps(void *private_context)
2681 {
2682     return 1;
2683 }
2684
2685 static int
2686 pp_dn_y_steps(void *private_context)
2687 {
2688     struct pp_dn_context *pp_dn_context = private_context;
2689
2690     return pp_dn_context->dest_h / 8;
2691 }
2692
2693 static int
2694 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2695 {
2696     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2697
2698     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2699     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
2700
2701     return 0;
2702 }
2703
2704 static VAStatus
2705 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2706                       const struct i965_surface *src_surface,
2707                       const VARectangle *src_rect,
2708                       struct i965_surface *dst_surface,
2709                       const VARectangle *dst_rect,
2710                       void *filter_param)
2711 {
2712     struct i965_driver_data *i965 = i965_driver_data(ctx);
2713     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
2714     struct object_surface *obj_surface;
2715     struct i965_sampler_dndi *sampler_dndi;
2716     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2717     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2718     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
2719     int index;
2720     int w, h;
2721     int orig_w, orig_h;
2722     int dn_strength = 15;
2723     int dndi_top_first = 1;
2724     int dn_progressive = 0;
2725
2726     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
2727         dndi_top_first = 1;
2728         dn_progressive = 1;
2729     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
2730         dndi_top_first = 1;
2731         dn_progressive = 0;
2732     } else {
2733         dndi_top_first = 0;
2734         dn_progressive = 0;
2735     }
2736
2737     if (dn_filter_param) {
2738         float value = dn_filter_param->value;
2739         
2740         if (value > 1.0)
2741             value = 1.0;
2742         
2743         if (value < 0.0)
2744             value = 0.0;
2745
2746         dn_strength = (int)(value * 31.0F);
2747     }
2748
2749     /* surface */
2750     obj_surface = SURFACE(src_surface->id);
2751     orig_w = obj_surface->orig_width;
2752     orig_h = obj_surface->orig_height;
2753     w = obj_surface->width;
2754     h = obj_surface->height;
2755
2756     if (pp_context->stmm.bo == NULL) {
2757         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2758                                            "STMM surface",
2759                                            w * h,
2760                                            4096);
2761         assert(pp_context->stmm.bo);
2762     }
2763
2764     /* source UV surface index 2 */
2765     i965_pp_set_surface_state(ctx, pp_context,
2766                               obj_surface->bo, w * h,
2767                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2768                               2, 0);
2769
2770     /* source YUV surface index 4 */
2771     i965_pp_set_surface2_state(ctx, pp_context,
2772                                obj_surface->bo, 0,
2773                                orig_w, orig_h, w,
2774                                0, h,
2775                                SURFACE_FORMAT_PLANAR_420_8, 1,
2776                                4);
2777
2778     /* source STMM surface index 20 */
2779     i965_pp_set_surface_state(ctx, pp_context,
2780                               pp_context->stmm.bo, 0,
2781                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2782                               20, 1);
2783
2784     /* destination surface */
2785     obj_surface = SURFACE(dst_surface->id);
2786     orig_w = obj_surface->orig_width;
2787     orig_h = obj_surface->orig_height;
2788     w = obj_surface->width;
2789     h = obj_surface->height;
2790
2791     /* destination Y surface index 7 */
2792     i965_pp_set_surface_state(ctx, pp_context,
2793                               obj_surface->bo, 0,
2794                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2795                               7, 1);
2796
2797     /* destination UV surface index 8 */
2798     i965_pp_set_surface_state(ctx, pp_context,
2799                               obj_surface->bo, w * h,
2800                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2801                               8, 1);
2802     /* sampler dn */
2803     dri_bo_map(pp_context->sampler_state_table.bo, True);
2804     assert(pp_context->sampler_state_table.bo->virtual);
2805     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
2806     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
2807
2808     /* sample dndi index 1 */
2809     index = 0;
2810     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
2811     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
2812     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
2813     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
2814
2815     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
2816     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
2817     sampler_dndi[index].dw1.stmm_c2 = 0;
2818     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
2819     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
2820
2821     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
2822     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
2823     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
2824     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
2825
2826     sampler_dndi[index].dw3.maximum_stmm = 128;
2827     sampler_dndi[index].dw3.multipler_for_vecm = 2;
2828     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
2829     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
2830     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
2831
2832     sampler_dndi[index].dw4.sdi_delta = 8;
2833     sampler_dndi[index].dw4.sdi_threshold = 128;
2834     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
2835     sampler_dndi[index].dw4.stmm_shift_up = 0;
2836     sampler_dndi[index].dw4.stmm_shift_down = 0;
2837     sampler_dndi[index].dw4.minimum_stmm = 0;
2838
2839     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
2840     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
2841     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
2842     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
2843
2844     sampler_dndi[index].dw6.dn_enable = 1;
2845     sampler_dndi[index].dw6.di_enable = 0;
2846     sampler_dndi[index].dw6.di_partial = 0;
2847     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
2848     sampler_dndi[index].dw6.dndi_stream_id = 1;
2849     sampler_dndi[index].dw6.dndi_first_frame = 1;
2850     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
2851     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
2852     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
2853     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
2854
2855     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
2856     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
2857     sampler_dndi[index].dw7.vdi_walker_enable = 0;
2858     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
2859
2860     dri_bo_unmap(pp_context->sampler_state_table.bo);
2861
2862     /* private function & data */
2863     pp_context->pp_x_steps = pp_dn_x_steps;
2864     pp_context->pp_y_steps = pp_dn_y_steps;
2865     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
2866
2867     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
2868     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
2869     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
2870     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
2871
2872     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
2873     pp_inline_parameter->grf5.number_blocks = w / 16;
2874     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
2875     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
2876
2877     pp_dn_context->dest_w = w;
2878     pp_dn_context->dest_h = h;
2879
2880     dst_surface->flags = src_surface->flags;
2881     
2882     return VA_STATUS_SUCCESS;
2883 }
2884
2885 static int
2886 gen7_pp_dndi_x_steps(void *private_context)
2887 {
2888     struct pp_dndi_context *pp_dndi_context = private_context;
2889
2890     return pp_dndi_context->dest_w / 16;
2891 }
2892
2893 static int
2894 gen7_pp_dndi_y_steps(void *private_context)
2895 {
2896     struct pp_dndi_context *pp_dndi_context = private_context;
2897
2898     return pp_dndi_context->dest_h / 4;
2899 }
2900
2901 static int
2902 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2903 {
2904     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2905
2906     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
2907     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
2908
2909     return 0;
2910 }
2911
2912 static VAStatus
2913 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2914                              const struct i965_surface *src_surface,
2915                              const VARectangle *src_rect,
2916                              struct i965_surface *dst_surface,
2917                              const VARectangle *dst_rect,
2918                              void *filter_param)
2919 {
2920     struct i965_driver_data *i965 = i965_driver_data(ctx);
2921     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
2922     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2923     struct object_surface *obj_surface;
2924     struct gen7_sampler_dndi *sampler_dndi;
2925     int index;
2926     int w, h;
2927     int orig_w, orig_h;
2928     int dndi_top_first = 1;
2929
2930     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
2931         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
2932
2933     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
2934         dndi_top_first = 1;
2935     else
2936         dndi_top_first = 0;
2937
2938     /* surface */
2939     obj_surface = SURFACE(src_surface->id);
2940     orig_w = obj_surface->orig_width;
2941     orig_h = obj_surface->orig_height;
2942     w = obj_surface->width;
2943     h = obj_surface->height;
2944
2945     if (pp_context->stmm.bo == NULL) {
2946         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
2947                                            "STMM surface",
2948                                            w * h,
2949                                            4096);
2950         assert(pp_context->stmm.bo);
2951     }
2952
2953     /* source UV surface index 1 */
2954     gen7_pp_set_surface_state(ctx, pp_context,
2955                               obj_surface->bo, w * h,
2956                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2957                               1, 0);
2958
2959     /* source YUV surface index 3 */
2960     gen7_pp_set_surface2_state(ctx, pp_context,
2961                                obj_surface->bo, 0,
2962                                orig_w, orig_h, w,
2963                                0, h,
2964                                SURFACE_FORMAT_PLANAR_420_8, 1,
2965                                3);
2966
2967     /* source (temporal reference) YUV surface index 4 */
2968     gen7_pp_set_surface2_state(ctx, pp_context,
2969                                obj_surface->bo, 0,
2970                                orig_w, orig_h, w,
2971                                0, h,
2972                                SURFACE_FORMAT_PLANAR_420_8, 1,
2973                                4);
2974
2975     /* STMM / History Statistics input surface, index 5 */
2976     gen7_pp_set_surface_state(ctx, pp_context,
2977                               pp_context->stmm.bo, 0,
2978                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2979                               5, 1);
2980
2981     /* destination surface */
2982     obj_surface = SURFACE(dst_surface->id);
2983     orig_w = obj_surface->orig_width;
2984     orig_h = obj_surface->orig_height;
2985     w = obj_surface->width;
2986     h = obj_surface->height;
2987
2988     /* destination(Previous frame) Y surface index 27 */
2989     gen7_pp_set_surface_state(ctx, pp_context,
2990                               obj_surface->bo, 0,
2991                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
2992                               27, 1);
2993
2994     /* destination(Previous frame) UV surface index 28 */
2995     gen7_pp_set_surface_state(ctx, pp_context,
2996                               obj_surface->bo, w * h,
2997                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
2998                               28, 1);
2999
3000     /* destination(Current frame) Y surface index 30 */
3001     gen7_pp_set_surface_state(ctx, pp_context,
3002                               obj_surface->bo, 0,
3003                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3004                               30, 1);
3005
3006     /* destination(Current frame) UV surface index 31 */
3007     gen7_pp_set_surface_state(ctx, pp_context,
3008                               obj_surface->bo, w * h,
3009                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3010                               31, 1);
3011
3012     /* STMM output surface, index 33 */
3013     gen7_pp_set_surface_state(ctx, pp_context,
3014                               pp_context->stmm.bo, 0,
3015                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3016                               33, 1);
3017
3018
3019     /* sampler dndi */
3020     dri_bo_map(pp_context->sampler_state_table.bo, True);
3021     assert(pp_context->sampler_state_table.bo->virtual);
3022     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3023     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3024
3025     /* sample dndi index 0 */
3026     index = 0;
3027     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3028     sampler_dndi[index].dw0.dnmh_delt = 8;
3029     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3030     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3031     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3032     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3033
3034     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3035     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3036     sampler_dndi[index].dw1.stmm_c2 = 0;
3037     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3038     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3039
3040     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3041     sampler_dndi[index].dw2.bne_edge_th = 1;
3042     sampler_dndi[index].dw2.smooth_mv_th = 0;
3043     sampler_dndi[index].dw2.sad_tight_th = 5;
3044     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3045     sampler_dndi[index].dw2.good_neighbor_th = 4;
3046
3047     sampler_dndi[index].dw3.maximum_stmm = 128;
3048     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3049     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3050     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3051     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3052
3053     sampler_dndi[index].dw4.sdi_delta = 8;
3054     sampler_dndi[index].dw4.sdi_threshold = 128;
3055     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3056     sampler_dndi[index].dw4.stmm_shift_up = 0;
3057     sampler_dndi[index].dw4.stmm_shift_down = 0;
3058     sampler_dndi[index].dw4.minimum_stmm = 0;
3059
3060     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3061     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3062     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3063     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3064
3065     sampler_dndi[index].dw6.dn_enable = 0;
3066     sampler_dndi[index].dw6.di_enable = 1;
3067     sampler_dndi[index].dw6.di_partial = 0;
3068     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3069     sampler_dndi[index].dw6.dndi_stream_id = 1;
3070     sampler_dndi[index].dw6.dndi_first_frame = 1;
3071     sampler_dndi[index].dw6.progressive_dn = 0;
3072     sampler_dndi[index].dw6.mcdi_enable = 0;
3073     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3074     sampler_dndi[index].dw6.cat_th1 = 0;
3075     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3076     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3077
3078     sampler_dndi[index].dw7.sad_tha = 5;
3079     sampler_dndi[index].dw7.sad_thb = 10;
3080     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3081     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3082     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3083     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3084     sampler_dndi[index].dw7.neighborpixel_th = 10;
3085     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3086
3087     dri_bo_unmap(pp_context->sampler_state_table.bo);
3088
3089     /* private function & data */
3090     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3091     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3092     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3093
3094     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3095     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3096     pp_static_parameter->grf1.di_top_field_first = 0;
3097     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3098
3099     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3100     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3101     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3102
3103     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3104     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3105
3106     pp_dndi_context->dest_w = w;
3107     pp_dndi_context->dest_h = h;
3108
3109     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3110
3111     return VA_STATUS_SUCCESS;
3112 }
3113
3114 static int
3115 gen7_pp_dn_x_steps(void *private_context)
3116 {
3117     return 1;
3118 }
3119
3120 static int
3121 gen7_pp_dn_y_steps(void *private_context)
3122 {
3123     struct pp_dn_context *pp_dn_context = private_context;
3124
3125     return pp_dn_context->dest_h / 4;
3126 }
3127
3128 static int
3129 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3130 {
3131     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3132
3133     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3134     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3135
3136     return 0;
3137 }
3138
3139 static VAStatus
3140 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3141                            const struct i965_surface *src_surface,
3142                            const VARectangle *src_rect,
3143                            struct i965_surface *dst_surface,
3144                            const VARectangle *dst_rect,
3145                            void *filter_param)
3146 {
3147     struct i965_driver_data *i965 = i965_driver_data(ctx);
3148     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3149     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3150     struct object_surface *obj_surface;
3151     struct gen7_sampler_dndi *sampler_dn;
3152     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3153     int index;
3154     int w, h;
3155     int orig_w, orig_h;
3156     int dn_strength = 15;
3157     int dndi_top_first = 1;
3158     int dn_progressive = 0;
3159
3160     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3161         dndi_top_first = 1;
3162         dn_progressive = 1;
3163     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3164         dndi_top_first = 1;
3165         dn_progressive = 0;
3166     } else {
3167         dndi_top_first = 0;
3168         dn_progressive = 0;
3169     }
3170
3171     if (dn_filter_param) {
3172         float value = dn_filter_param->value;
3173         
3174         if (value > 1.0)
3175             value = 1.0;
3176         
3177         if (value < 0.0)
3178             value = 0.0;
3179
3180         dn_strength = (int)(value * 31.0F);
3181     }
3182
3183     /* surface */
3184     obj_surface = SURFACE(src_surface->id);
3185     orig_w = obj_surface->orig_width;
3186     orig_h = obj_surface->orig_height;
3187     w = obj_surface->width;
3188     h = obj_surface->height;
3189
3190     if (pp_context->stmm.bo == NULL) {
3191         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3192                                            "STMM surface",
3193                                            w * h,
3194                                            4096);
3195         assert(pp_context->stmm.bo);
3196     }
3197
3198     /* source UV surface index 1 */
3199     gen7_pp_set_surface_state(ctx, pp_context,
3200                               obj_surface->bo, w * h,
3201                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3202                               1, 0);
3203
3204     /* source YUV surface index 3 */
3205     gen7_pp_set_surface2_state(ctx, pp_context,
3206                                obj_surface->bo, 0,
3207                                orig_w, orig_h, w,
3208                                0, h,
3209                                SURFACE_FORMAT_PLANAR_420_8, 1,
3210                                3);
3211
3212     /* source STMM surface index 5 */
3213     gen7_pp_set_surface_state(ctx, pp_context,
3214                               pp_context->stmm.bo, 0,
3215                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3216                               5, 1);
3217
3218     /* destination surface */
3219     obj_surface = SURFACE(dst_surface->id);
3220     orig_w = obj_surface->orig_width;
3221     orig_h = obj_surface->orig_height;
3222     w = obj_surface->width;
3223     h = obj_surface->height;
3224
3225     /* destination Y surface index 7 */
3226     gen7_pp_set_surface_state(ctx, pp_context,
3227                               obj_surface->bo, 0,
3228                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3229                               7, 1);
3230
3231     /* destination UV surface index 8 */
3232     gen7_pp_set_surface_state(ctx, pp_context,
3233                               obj_surface->bo, w * h,
3234                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3235                               8, 1);
3236     /* sampler dn */
3237     dri_bo_map(pp_context->sampler_state_table.bo, True);
3238     assert(pp_context->sampler_state_table.bo->virtual);
3239     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3240     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3241
3242     /* sample dn index 1 */
3243     index = 0;
3244     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3245     sampler_dn[index].dw0.dnmh_delt = 8;
3246     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3247     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3248     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3249     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3250
3251     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3252     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3253     sampler_dn[index].dw1.stmm_c2 = 0;
3254     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3255     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3256
3257     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3258     sampler_dn[index].dw2.bne_edge_th = 1;
3259     sampler_dn[index].dw2.smooth_mv_th = 0;
3260     sampler_dn[index].dw2.sad_tight_th = 5;
3261     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3262     sampler_dn[index].dw2.good_neighbor_th = 4;
3263
3264     sampler_dn[index].dw3.maximum_stmm = 128;
3265     sampler_dn[index].dw3.multipler_for_vecm = 2;
3266     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3267     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3268     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3269
3270     sampler_dn[index].dw4.sdi_delta = 8;
3271     sampler_dn[index].dw4.sdi_threshold = 128;
3272     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3273     sampler_dn[index].dw4.stmm_shift_up = 0;
3274     sampler_dn[index].dw4.stmm_shift_down = 0;
3275     sampler_dn[index].dw4.minimum_stmm = 0;
3276
3277     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3278     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3279     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3280     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3281
3282     sampler_dn[index].dw6.dn_enable = 1;
3283     sampler_dn[index].dw6.di_enable = 0;
3284     sampler_dn[index].dw6.di_partial = 0;
3285     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3286     sampler_dn[index].dw6.dndi_stream_id = 1;
3287     sampler_dn[index].dw6.dndi_first_frame = 1;
3288     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3289     sampler_dn[index].dw6.mcdi_enable = 0;
3290     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3291     sampler_dn[index].dw6.cat_th1 = 0;
3292     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3293     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3294
3295     sampler_dn[index].dw7.sad_tha = 5;
3296     sampler_dn[index].dw7.sad_thb = 10;
3297     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3298     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3299     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3300     sampler_dn[index].dw7.vdi_walker_enable = 0;
3301     sampler_dn[index].dw7.neighborpixel_th = 10;
3302     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3303
3304     dri_bo_unmap(pp_context->sampler_state_table.bo);
3305
3306     /* private function & data */
3307     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3308     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3309     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3310
3311     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3312     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3313     pp_static_parameter->grf1.di_top_field_first = 0;
3314     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3315
3316     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3317     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3318     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3319
3320     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3321     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3322
3323     pp_dn_context->dest_w = w;
3324     pp_dn_context->dest_h = h;
3325
3326     dst_surface->flags = src_surface->flags;
3327
3328     return VA_STATUS_SUCCESS;
3329 }
3330
3331 static VAStatus
3332 ironlake_pp_initialize(
3333     VADriverContextP   ctx,
3334     struct i965_post_processing_context *pp_context,
3335     const struct i965_surface *src_surface,
3336     const VARectangle *src_rect,
3337     struct i965_surface *dst_surface,
3338     const VARectangle *dst_rect,
3339     int                pp_index,
3340     void *filter_param
3341 )
3342 {
3343     VAStatus va_status;
3344     struct i965_driver_data *i965 = i965_driver_data(ctx);
3345     struct pp_module *pp_module;
3346     dri_bo *bo;
3347     int static_param_size, inline_param_size;
3348
3349     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3350     bo = dri_bo_alloc(i965->intel.bufmgr,
3351                       "surface state & binding table",
3352                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3353                       4096);
3354     assert(bo);
3355     pp_context->surface_state_binding_table.bo = bo;
3356
3357     dri_bo_unreference(pp_context->curbe.bo);
3358     bo = dri_bo_alloc(i965->intel.bufmgr,
3359                       "constant buffer",
3360                       4096, 
3361                       4096);
3362     assert(bo);
3363     pp_context->curbe.bo = bo;
3364
3365     dri_bo_unreference(pp_context->idrt.bo);
3366     bo = dri_bo_alloc(i965->intel.bufmgr, 
3367                       "interface discriptor", 
3368                       sizeof(struct i965_interface_descriptor), 
3369                       4096);
3370     assert(bo);
3371     pp_context->idrt.bo = bo;
3372     pp_context->idrt.num_interface_descriptors = 0;
3373
3374     dri_bo_unreference(pp_context->sampler_state_table.bo);
3375     bo = dri_bo_alloc(i965->intel.bufmgr, 
3376                       "sampler state table", 
3377                       4096,
3378                       4096);
3379     assert(bo);
3380     dri_bo_map(bo, True);
3381     memset(bo->virtual, 0, bo->size);
3382     dri_bo_unmap(bo);
3383     pp_context->sampler_state_table.bo = bo;
3384
3385     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3386     bo = dri_bo_alloc(i965->intel.bufmgr, 
3387                       "sampler 8x8 state ",
3388                       4096,
3389                       4096);
3390     assert(bo);
3391     pp_context->sampler_state_table.bo_8x8 = bo;
3392
3393     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3394     bo = dri_bo_alloc(i965->intel.bufmgr, 
3395                       "sampler 8x8 state ",
3396                       4096,
3397                       4096);
3398     assert(bo);
3399     pp_context->sampler_state_table.bo_8x8_uv = bo;
3400
3401     dri_bo_unreference(pp_context->vfe_state.bo);
3402     bo = dri_bo_alloc(i965->intel.bufmgr, 
3403                       "vfe state", 
3404                       sizeof(struct i965_vfe_state), 
3405                       4096);
3406     assert(bo);
3407     pp_context->vfe_state.bo = bo;
3408
3409     static_param_size = sizeof(struct pp_static_parameter);
3410     inline_param_size = sizeof(struct pp_inline_parameter);
3411
3412     memset(pp_context->pp_static_parameter, 0, static_param_size);
3413     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3414     
3415     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3416     pp_context->current_pp = pp_index;
3417     pp_module = &pp_context->pp_modules[pp_index];
3418     
3419     if (pp_module->initialize)
3420         va_status = pp_module->initialize(ctx, pp_context,
3421                                           src_surface,
3422                                           src_rect,
3423                                           dst_surface,
3424                                           dst_rect,
3425                                           filter_param);
3426     else
3427         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3428
3429     return va_status;
3430 }
3431
3432 static VAStatus
3433 ironlake_post_processing(
3434     VADriverContextP   ctx,
3435     struct i965_post_processing_context *pp_context,
3436     const struct i965_surface *src_surface,
3437     const VARectangle *src_rect,
3438     struct i965_surface *dst_surface,
3439     const VARectangle *dst_rect,
3440     int                pp_index,
3441     void *filter_param
3442 )
3443 {
3444     VAStatus va_status;
3445
3446     va_status = ironlake_pp_initialize(ctx, pp_context,
3447                                        src_surface,
3448                                        src_rect,
3449                                        dst_surface,
3450                                        dst_rect,
3451                                        pp_index,
3452                                        filter_param);
3453
3454     if (va_status == VA_STATUS_SUCCESS) {
3455         ironlake_pp_states_setup(ctx, pp_context);
3456         ironlake_pp_pipeline_setup(ctx, pp_context);
3457     }
3458
3459     return va_status;
3460 }
3461
3462 static VAStatus
3463 gen6_pp_initialize(
3464     VADriverContextP   ctx,
3465     struct i965_post_processing_context *pp_context,
3466     const struct i965_surface *src_surface,
3467     const VARectangle *src_rect,
3468     struct i965_surface *dst_surface,
3469     const VARectangle *dst_rect,
3470     int                pp_index,
3471     void *filter_param
3472 )
3473 {
3474     VAStatus va_status;
3475     struct i965_driver_data *i965 = i965_driver_data(ctx);
3476     struct pp_module *pp_module;
3477     dri_bo *bo;
3478     int static_param_size, inline_param_size;
3479
3480     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3481     bo = dri_bo_alloc(i965->intel.bufmgr,
3482                       "surface state & binding table",
3483                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3484                       4096);
3485     assert(bo);
3486     pp_context->surface_state_binding_table.bo = bo;
3487
3488     dri_bo_unreference(pp_context->curbe.bo);
3489     bo = dri_bo_alloc(i965->intel.bufmgr,
3490                       "constant buffer",
3491                       4096, 
3492                       4096);
3493     assert(bo);
3494     pp_context->curbe.bo = bo;
3495
3496     dri_bo_unreference(pp_context->idrt.bo);
3497     bo = dri_bo_alloc(i965->intel.bufmgr, 
3498                       "interface discriptor", 
3499                       sizeof(struct gen6_interface_descriptor_data), 
3500                       4096);
3501     assert(bo);
3502     pp_context->idrt.bo = bo;
3503     pp_context->idrt.num_interface_descriptors = 0;
3504
3505     dri_bo_unreference(pp_context->sampler_state_table.bo);
3506     bo = dri_bo_alloc(i965->intel.bufmgr, 
3507                       "sampler state table", 
3508                       4096,
3509                       4096);
3510     assert(bo);
3511     dri_bo_map(bo, True);
3512     memset(bo->virtual, 0, bo->size);
3513     dri_bo_unmap(bo);
3514     pp_context->sampler_state_table.bo = bo;
3515
3516     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3517     bo = dri_bo_alloc(i965->intel.bufmgr, 
3518                       "sampler 8x8 state ",
3519                       4096,
3520                       4096);
3521     assert(bo);
3522     pp_context->sampler_state_table.bo_8x8 = bo;
3523
3524     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3525     bo = dri_bo_alloc(i965->intel.bufmgr, 
3526                       "sampler 8x8 state ",
3527                       4096,
3528                       4096);
3529     assert(bo);
3530     pp_context->sampler_state_table.bo_8x8_uv = bo;
3531
3532     dri_bo_unreference(pp_context->vfe_state.bo);
3533     bo = dri_bo_alloc(i965->intel.bufmgr, 
3534                       "vfe state", 
3535                       sizeof(struct i965_vfe_state), 
3536                       4096);
3537     assert(bo);
3538     pp_context->vfe_state.bo = bo;
3539     
3540     if (IS_GEN7(i965->intel.device_id)) {
3541         static_param_size = sizeof(struct gen7_pp_static_parameter);
3542         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
3543     } else {
3544         static_param_size = sizeof(struct pp_static_parameter);
3545         inline_param_size = sizeof(struct pp_inline_parameter);
3546     }
3547
3548     memset(pp_context->pp_static_parameter, 0, static_param_size);
3549     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3550
3551     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3552     pp_context->current_pp = pp_index;
3553     pp_module = &pp_context->pp_modules[pp_index];
3554     
3555     if (pp_module->initialize)
3556         va_status = pp_module->initialize(ctx, pp_context,
3557                                           src_surface,
3558                                           src_rect,
3559                                           dst_surface,
3560                                           dst_rect,
3561                                           filter_param);
3562     else
3563         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
3564
3565     return va_status;
3566 }
3567
3568 static void
3569 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
3570                                    struct i965_post_processing_context *pp_context)
3571 {
3572     struct i965_driver_data *i965 = i965_driver_data(ctx);
3573     struct gen6_interface_descriptor_data *desc;
3574     dri_bo *bo;
3575     int pp_index = pp_context->current_pp;
3576
3577     bo = pp_context->idrt.bo;
3578     dri_bo_map(bo, True);
3579     assert(bo->virtual);
3580     desc = bo->virtual;
3581     memset(desc, 0, sizeof(*desc));
3582     desc->desc0.kernel_start_pointer = 
3583         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
3584     desc->desc1.single_program_flow = 1;
3585     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
3586     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
3587     desc->desc2.sampler_state_pointer = 
3588         pp_context->sampler_state_table.bo->offset >> 5;
3589     desc->desc3.binding_table_entry_count = 0;
3590     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
3591     desc->desc4.constant_urb_entry_read_offset = 0;
3592
3593     if (IS_GEN7(i965->intel.device_id))
3594         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
3595     else
3596         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
3597
3598     dri_bo_emit_reloc(bo,
3599                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3600                       0,
3601                       offsetof(struct gen6_interface_descriptor_data, desc0),
3602                       pp_context->pp_modules[pp_index].kernel.bo);
3603
3604     dri_bo_emit_reloc(bo,
3605                       I915_GEM_DOMAIN_INSTRUCTION, 0,
3606                       desc->desc2.sampler_count << 2,
3607                       offsetof(struct gen6_interface_descriptor_data, desc2),
3608                       pp_context->sampler_state_table.bo);
3609
3610     dri_bo_unmap(bo);
3611     pp_context->idrt.num_interface_descriptors++;
3612 }
3613
3614 static void
3615 gen6_pp_upload_constants(VADriverContextP ctx,
3616                          struct i965_post_processing_context *pp_context)
3617 {
3618     struct i965_driver_data *i965 = i965_driver_data(ctx);
3619     unsigned char *constant_buffer;
3620     int param_size;
3621
3622     assert(sizeof(struct pp_static_parameter) == 128);
3623     assert(sizeof(struct gen7_pp_static_parameter) == 192);
3624
3625     if (IS_GEN7(i965->intel.device_id))
3626         param_size = sizeof(struct gen7_pp_static_parameter);
3627     else
3628         param_size = sizeof(struct pp_static_parameter);
3629
3630     dri_bo_map(pp_context->curbe.bo, 1);
3631     assert(pp_context->curbe.bo->virtual);
3632     constant_buffer = pp_context->curbe.bo->virtual;
3633     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
3634     dri_bo_unmap(pp_context->curbe.bo);
3635 }
3636
3637 static void
3638 gen6_pp_states_setup(VADriverContextP ctx,
3639                      struct i965_post_processing_context *pp_context)
3640 {
3641     gen6_pp_interface_descriptor_table(ctx, pp_context);
3642     gen6_pp_upload_constants(ctx, pp_context);
3643 }
3644
3645 static void
3646 gen6_pp_pipeline_select(VADriverContextP ctx,
3647                         struct i965_post_processing_context *pp_context)
3648 {
3649     struct intel_batchbuffer *batch = pp_context->batch;
3650
3651     BEGIN_BATCH(batch, 1);
3652     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
3653     ADVANCE_BATCH(batch);
3654 }
3655
3656 static void
3657 gen6_pp_state_base_address(VADriverContextP ctx,
3658                            struct i965_post_processing_context *pp_context)
3659 {
3660     struct intel_batchbuffer *batch = pp_context->batch;
3661
3662     BEGIN_BATCH(batch, 10);
3663     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
3664     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3665     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
3666     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3667     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3668     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3669     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3670     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3671     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3672     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
3673     ADVANCE_BATCH(batch);
3674 }
3675
3676 static void
3677 gen6_pp_vfe_state(VADriverContextP ctx,
3678                   struct i965_post_processing_context *pp_context)
3679 {
3680     struct intel_batchbuffer *batch = pp_context->batch;
3681
3682     BEGIN_BATCH(batch, 8);
3683     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
3684     OUT_BATCH(batch, 0);
3685     OUT_BATCH(batch,
3686               (pp_context->urb.num_vfe_entries - 1) << 16 |
3687               pp_context->urb.num_vfe_entries << 8);
3688     OUT_BATCH(batch, 0);
3689     OUT_BATCH(batch,
3690               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
3691               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
3692     OUT_BATCH(batch, 0);
3693     OUT_BATCH(batch, 0);
3694     OUT_BATCH(batch, 0);
3695     ADVANCE_BATCH(batch);
3696 }
3697
3698 static void
3699 gen6_pp_curbe_load(VADriverContextP ctx,
3700                    struct i965_post_processing_context *pp_context)
3701 {
3702     struct intel_batchbuffer *batch = pp_context->batch;
3703
3704     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
3705
3706     BEGIN_BATCH(batch, 4);
3707     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
3708     OUT_BATCH(batch, 0);
3709     OUT_BATCH(batch,
3710               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
3711     OUT_RELOC(batch, 
3712               pp_context->curbe.bo,
3713               I915_GEM_DOMAIN_INSTRUCTION, 0,
3714               0);
3715     ADVANCE_BATCH(batch);
3716 }
3717
3718 static void
3719 gen6_interface_descriptor_load(VADriverContextP ctx,
3720                                struct i965_post_processing_context *pp_context)
3721 {
3722     struct intel_batchbuffer *batch = pp_context->batch;
3723
3724     BEGIN_BATCH(batch, 4);
3725     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
3726     OUT_BATCH(batch, 0);
3727     OUT_BATCH(batch,
3728               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
3729     OUT_RELOC(batch, 
3730               pp_context->idrt.bo,
3731               I915_GEM_DOMAIN_INSTRUCTION, 0,
3732               0);
3733     ADVANCE_BATCH(batch);
3734 }
3735
3736 static void
3737 gen6_pp_object_walker(VADriverContextP ctx,
3738                       struct i965_post_processing_context *pp_context)
3739 {
3740     struct i965_driver_data *i965 = i965_driver_data(ctx);
3741     struct intel_batchbuffer *batch = pp_context->batch;
3742     int x, x_steps, y, y_steps;
3743     int param_size, command_length_in_dws;
3744     dri_bo *command_buffer;
3745     unsigned int *command_ptr;
3746
3747     if (IS_GEN7(i965->intel.device_id))
3748         param_size = sizeof(struct gen7_pp_inline_parameter);
3749     else
3750         param_size = sizeof(struct pp_inline_parameter);
3751
3752     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
3753     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
3754     command_length_in_dws = 6 + (param_size >> 2);
3755     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
3756                                   "command objects buffer",
3757                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
3758                                   4096);
3759
3760     dri_bo_map(command_buffer, 1);
3761     command_ptr = command_buffer->virtual;
3762
3763     for (y = 0; y < y_steps; y++) {
3764         for (x = 0; x < x_steps; x++) {
3765             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
3766                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
3767                 *command_ptr++ = 0;
3768                 *command_ptr++ = 0;
3769                 *command_ptr++ = 0;
3770                 *command_ptr++ = 0;
3771                 *command_ptr++ = 0;
3772                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
3773                 command_ptr += (param_size >> 2);
3774             }
3775         }
3776     }
3777
3778     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
3779         *command_ptr++ = 0;
3780
3781     *command_ptr = MI_BATCH_BUFFER_END;
3782
3783     dri_bo_unmap(command_buffer);
3784
3785     BEGIN_BATCH(batch, 2);
3786     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
3787     OUT_RELOC(batch, command_buffer, 
3788               I915_GEM_DOMAIN_COMMAND, 0, 
3789               0);
3790     ADVANCE_BATCH(batch);
3791     
3792     dri_bo_unreference(command_buffer);
3793
3794     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
3795      * will cause control to pass back to ring buffer 
3796      */
3797     intel_batchbuffer_end_atomic(batch);
3798     intel_batchbuffer_flush(batch);
3799     intel_batchbuffer_start_atomic(batch, 0x1000);
3800 }
3801
3802 static void
3803 gen6_pp_pipeline_setup(VADriverContextP ctx,
3804                        struct i965_post_processing_context *pp_context)
3805 {
3806     struct intel_batchbuffer *batch = pp_context->batch;
3807
3808     intel_batchbuffer_start_atomic(batch, 0x1000);
3809     intel_batchbuffer_emit_mi_flush(batch);
3810     gen6_pp_pipeline_select(ctx, pp_context);
3811     gen6_pp_state_base_address(ctx, pp_context);
3812     gen6_pp_vfe_state(ctx, pp_context);
3813     gen6_pp_curbe_load(ctx, pp_context);
3814     gen6_interface_descriptor_load(ctx, pp_context);
3815     gen6_pp_object_walker(ctx, pp_context);
3816     intel_batchbuffer_end_atomic(batch);
3817 }
3818
3819 static VAStatus
3820 gen6_post_processing(
3821     VADriverContextP   ctx,
3822     struct i965_post_processing_context *pp_context,
3823     const struct i965_surface *src_surface,
3824     const VARectangle *src_rect,
3825     struct i965_surface *dst_surface,
3826     const VARectangle *dst_rect,
3827     int                pp_index,
3828     void * filter_param
3829 )
3830 {
3831     VAStatus va_status;
3832     
3833     va_status = gen6_pp_initialize(ctx, pp_context,
3834                                    src_surface,
3835                                    src_rect,
3836                                    dst_surface,
3837                                    dst_rect,
3838                                    pp_index,
3839                                    filter_param);
3840
3841     if (va_status == VA_STATUS_SUCCESS) {
3842         gen6_pp_states_setup(ctx, pp_context);
3843         gen6_pp_pipeline_setup(ctx, pp_context);
3844     }
3845
3846     return va_status;
3847 }
3848
3849 static VAStatus
3850 i965_post_processing_internal(
3851     VADriverContextP   ctx,
3852     struct i965_post_processing_context *pp_context,
3853     const struct i965_surface *src_surface,
3854     const VARectangle *src_rect,
3855     struct i965_surface *dst_surface,
3856     const VARectangle *dst_rect,
3857     int                pp_index,
3858     void *filter_param
3859 )
3860 {
3861     struct i965_driver_data *i965 = i965_driver_data(ctx);
3862     VAStatus va_status;
3863
3864     if (IS_GEN6(i965->intel.device_id) ||
3865         IS_GEN7(i965->intel.device_id))
3866         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3867     else
3868         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
3869     
3870     return va_status;
3871 }
3872
3873 VAStatus 
3874 i965_DestroySurfaces(VADriverContextP ctx,
3875                      VASurfaceID *surface_list,
3876                      int num_surfaces);
3877 VAStatus 
3878 i965_CreateSurfaces(VADriverContextP ctx,
3879                     int width,
3880                     int height,
3881                     int format,
3882                     int num_surfaces,
3883                     VASurfaceID *surfaces);
3884
3885 static void
3886 rgb_to_yuv(unsigned int argb,
3887            unsigned char *y,
3888            unsigned char *u,
3889            unsigned char *v,
3890            unsigned char *a)
3891 {
3892     int r = ((argb >> 16) & 0xff);
3893     int g = ((argb >> 8) & 0xff);
3894     int b = ((argb >> 0) & 0xff);
3895     
3896     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
3897     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
3898     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
3899     *a = ((argb >> 24) & 0xff);
3900 }
3901
3902 static void 
3903 i965_vpp_clear_surface(VADriverContextP ctx,
3904                        struct i965_post_processing_context *pp_context,
3905                        VASurfaceID surface,
3906                        unsigned int color)
3907 {
3908     struct i965_driver_data *i965 = i965_driver_data(ctx);
3909     struct intel_batchbuffer *batch = pp_context->batch;
3910     struct object_surface *obj_surface = SURFACE(surface);
3911     unsigned int blt_cmd, br13;
3912     unsigned int tiling = 0, swizzle = 0;
3913     int pitch;
3914     unsigned char y, u, v, a = 0;
3915
3916     /* Currently only support NV12 surface */
3917     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
3918         return;
3919
3920     rgb_to_yuv(color, &y, &u, &v, &a);
3921
3922     if (a == 0)
3923         return;
3924
3925     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
3926     blt_cmd = XY_COLOR_BLT_CMD;
3927     pitch = obj_surface->width;
3928
3929     if (tiling != I915_TILING_NONE) {
3930         blt_cmd |= XY_COLOR_BLT_DST_TILED;
3931         pitch >>= 2;
3932     }
3933
3934     br13 = 0xf0 << 16;
3935     br13 |= BR13_8;
3936     br13 |= pitch;
3937
3938     if (IS_GEN6(i965->intel.device_id) ||
3939         IS_GEN7(i965->intel.device_id)) {
3940         intel_batchbuffer_start_atomic_blt(batch, 48);
3941         BEGIN_BLT_BATCH(batch, 12);
3942     } else {
3943         intel_batchbuffer_start_atomic(batch, 48);
3944         BEGIN_BATCH(batch, 12);
3945     }
3946
3947     OUT_BATCH(batch, blt_cmd);
3948     OUT_BATCH(batch, br13);
3949     OUT_BATCH(batch,
3950               0 << 16 |
3951               0);
3952     OUT_BATCH(batch,
3953               obj_surface->height << 16 |
3954               obj_surface->width);
3955     OUT_RELOC(batch, obj_surface->bo, 
3956               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3957               0);
3958     OUT_BATCH(batch, y);
3959
3960     br13 = 0xf0 << 16;
3961     br13 |= BR13_565;
3962     br13 |= pitch;
3963
3964     OUT_BATCH(batch, blt_cmd);
3965     OUT_BATCH(batch, br13);
3966     OUT_BATCH(batch,
3967               0 << 16 |
3968               0);
3969     OUT_BATCH(batch,
3970               obj_surface->height / 2 << 16 |
3971               obj_surface->width / 2);
3972     OUT_RELOC(batch, obj_surface->bo, 
3973               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
3974               obj_surface->width * obj_surface->y_cb_offset);
3975     OUT_BATCH(batch, v << 8 | u);
3976
3977     ADVANCE_BATCH(batch);
3978     intel_batchbuffer_end_atomic(batch);
3979 }
3980
3981 VASurfaceID
3982 i965_post_processing(
3983     VADriverContextP   ctx,
3984     VASurfaceID        surface,
3985     const VARectangle *src_rect,
3986     const VARectangle *dst_rect,
3987     unsigned int       flags,
3988     int               *has_done_scaling  
3989 )
3990 {
3991     struct i965_driver_data *i965 = i965_driver_data(ctx);
3992     VASurfaceID in_surface_id = surface;
3993     VASurfaceID out_surface_id = VA_INVALID_ID;
3994     
3995     *has_done_scaling = 0;
3996
3997     if (HAS_PP(i965)) {
3998         struct object_surface *obj_surface;
3999         VAStatus status;
4000         struct i965_surface src_surface;
4001         struct i965_surface dst_surface;
4002
4003         obj_surface = SURFACE(in_surface_id);
4004
4005         /* Currently only support post processing for NV12 surface */
4006         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4007             return out_surface_id;
4008
4009         _i965LockMutex(&i965->pp_mutex);
4010
4011         if (flags & I965_PP_FLAG_MCDI) {
4012             status = i965_CreateSurfaces(ctx,
4013                                          obj_surface->orig_width,
4014                                          obj_surface->orig_height,
4015                                          VA_RT_FORMAT_YUV420,
4016                                          1,
4017                                          &out_surface_id);
4018             assert(status == VA_STATUS_SUCCESS);
4019             obj_surface = SURFACE(out_surface_id);
4020             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4021             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4022             src_surface.id = in_surface_id;
4023             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4024             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4025                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4026             dst_surface.id = out_surface_id;
4027             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4028             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4029
4030             i965_post_processing_internal(ctx, i965->pp_context,
4031                                           &src_surface,
4032                                           src_rect,
4033                                           &dst_surface,
4034                                           dst_rect,
4035                                           PP_NV12_DNDI,
4036                                           NULL);
4037         }
4038
4039         if (flags & I965_PP_FLAG_AVS) {
4040             struct i965_render_state *render_state = &i965->render_state;
4041             struct intel_region *dest_region = render_state->draw_region;
4042
4043             if (out_surface_id != VA_INVALID_ID)
4044                 in_surface_id = out_surface_id;
4045
4046             status = i965_CreateSurfaces(ctx,
4047                                          dest_region->width,
4048                                          dest_region->height,
4049                                          VA_RT_FORMAT_YUV420,
4050                                          1,
4051                                          &out_surface_id);
4052             assert(status == VA_STATUS_SUCCESS);
4053             obj_surface = SURFACE(out_surface_id);
4054             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4055             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4056             src_surface.id = in_surface_id;
4057             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4058             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4059             dst_surface.id = out_surface_id;
4060             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4061             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4062
4063             i965_post_processing_internal(ctx, i965->pp_context,
4064                                           &src_surface,
4065                                           src_rect,
4066                                           &dst_surface,
4067                                           dst_rect,
4068                                           PP_NV12_AVS,
4069                                           NULL);
4070
4071             if (in_surface_id != surface)
4072                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4073                 
4074             *has_done_scaling = 1;
4075         }
4076
4077         _i965UnlockMutex(&i965->pp_mutex);
4078     }
4079
4080     return out_surface_id;
4081 }       
4082
4083 static VAStatus
4084 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4085                           const struct i965_surface *src_surface,
4086                           const VARectangle *src_rect,
4087                           struct i965_surface *dst_surface,
4088                           const VARectangle *dst_rect)
4089 {
4090     struct i965_driver_data *i965 = i965_driver_data(ctx);
4091     struct i965_post_processing_context *pp_context = i965->pp_context;
4092     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4093
4094     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4095         i965_post_processing_internal(ctx, i965->pp_context,
4096                                       src_surface,
4097                                       src_rect,
4098                                       dst_surface,
4099                                       dst_rect,
4100                                       PP_RGBX_LOAD_SAVE_NV12,
4101                                       NULL);
4102     } else {
4103         assert(0);
4104         return VA_STATUS_ERROR_UNKNOWN;
4105     }
4106
4107     intel_batchbuffer_flush(pp_context->batch);
4108
4109     return VA_STATUS_SUCCESS;
4110 }
4111
4112 static VAStatus
4113 i965_image_pl3_processing(VADriverContextP ctx,
4114                           const struct i965_surface *src_surface,
4115                           const VARectangle *src_rect,
4116                           struct i965_surface *dst_surface,
4117                           const VARectangle *dst_rect)
4118 {
4119     struct i965_driver_data *i965 = i965_driver_data(ctx);
4120     struct i965_post_processing_context *pp_context = i965->pp_context;
4121     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4122     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4123
4124     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4125         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4126                                                  src_surface,
4127                                                  src_rect,
4128                                                  dst_surface,
4129                                                  dst_rect,
4130                                                  PP_PL3_LOAD_SAVE_N12,
4131                                                  NULL);
4132     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4133                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4134                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4135                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4136         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4137                                                  src_surface,
4138                                                  src_rect,
4139                                                  dst_surface,
4140                                                  dst_rect,
4141                                                  PP_PL3_LOAD_SAVE_PL3,
4142                                                  NULL);
4143     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4144                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4145         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4146                                                  src_surface,
4147                                                  src_rect,
4148                                                  dst_surface,
4149                                                  dst_rect,
4150                                                  PP_PL3_LOAD_SAVE_PA,
4151                                                  NULL);
4152     }
4153     else {
4154         assert(0);
4155     }
4156
4157     intel_batchbuffer_flush(pp_context->batch);
4158
4159     return vaStatus;
4160 }
4161
4162 static VAStatus
4163 i965_image_pl2_processing(VADriverContextP ctx,
4164                           const struct i965_surface *src_surface,
4165                           const VARectangle *src_rect,
4166                           struct i965_surface *dst_surface,
4167                           const VARectangle *dst_rect)
4168 {
4169     struct i965_driver_data *i965 = i965_driver_data(ctx);
4170     struct i965_post_processing_context *pp_context = i965->pp_context;
4171     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4172     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4173
4174     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4175         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4176                                                  src_surface,
4177                                                  src_rect,
4178                                                  dst_surface,
4179                                                  dst_rect,
4180                                                  PP_NV12_LOAD_SAVE_N12,
4181                                                  NULL);
4182     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4183                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4184                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4185                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4186         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4187                                                  src_surface,
4188                                                  src_rect,
4189                                                  dst_surface,
4190                                                  dst_rect,
4191                                                  PP_NV12_LOAD_SAVE_PL3,
4192                                                  NULL);
4193     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4194                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4195         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4196                                                  src_surface,
4197                                                  src_rect,
4198                                                  dst_surface,
4199                                                  dst_rect,
4200                                                  PP_NV12_LOAD_SAVE_PA,
4201                                                      NULL);
4202     }
4203
4204     intel_batchbuffer_flush(pp_context->batch);
4205
4206     return vaStatus;
4207 }
4208
4209 static VAStatus
4210 i965_image_pl1_processing(VADriverContextP ctx,
4211                           const struct i965_surface *src_surface,
4212                           const VARectangle *src_rect,
4213                           struct i965_surface *dst_surface,
4214                           const VARectangle *dst_rect)
4215 {
4216     struct i965_driver_data *i965 = i965_driver_data(ctx);
4217     struct i965_post_processing_context *pp_context = i965->pp_context;
4218     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4219
4220     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4221         i965_post_processing_internal(ctx, i965->pp_context,
4222                                       src_surface,
4223                                       src_rect,
4224                                       dst_surface,
4225                                       dst_rect,
4226                                       PP_PA_LOAD_SAVE_NV12,
4227                                       NULL);
4228     }
4229     else if (fourcc == VA_FOURCC_YV12) {
4230         i965_post_processing_internal(ctx, i965->pp_context,
4231                                       src_surface,
4232                                       src_rect,
4233                                       dst_surface,
4234                                       dst_rect,
4235                                       PP_PA_LOAD_SAVE_PL3,
4236                                       NULL);
4237
4238     }
4239     else {
4240         return VA_STATUS_ERROR_UNKNOWN;
4241     }
4242
4243     intel_batchbuffer_flush(pp_context->batch);
4244
4245     return VA_STATUS_SUCCESS;
4246 }
4247
4248 VAStatus
4249 i965_image_processing(VADriverContextP ctx,
4250                       const struct i965_surface *src_surface,
4251                       const VARectangle *src_rect,
4252                       struct i965_surface *dst_surface,
4253                       const VARectangle *dst_rect)
4254 {
4255     struct i965_driver_data *i965 = i965_driver_data(ctx);
4256     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4257
4258     if (HAS_PP(i965)) {
4259         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4260
4261         _i965LockMutex(&i965->pp_mutex);
4262
4263         switch (fourcc) {
4264         case VA_FOURCC('Y', 'V', '1', '2'):
4265         case VA_FOURCC('I', '4', '2', '0'):
4266         case VA_FOURCC('I', 'M', 'C', '1'):
4267         case VA_FOURCC('I', 'M', 'C', '3'):
4268             status = i965_image_pl3_processing(ctx,
4269                                                src_surface,
4270                                                src_rect,
4271                                                dst_surface,
4272                                                dst_rect);
4273             break;
4274
4275         case  VA_FOURCC('N', 'V', '1', '2'):
4276             status = i965_image_pl2_processing(ctx,
4277                                                src_surface,
4278                                                src_rect,
4279                                                dst_surface,
4280                                                dst_rect);
4281             break;
4282         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4283         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4284             status = i965_image_pl1_processing(ctx,
4285                                                src_surface,
4286                                                src_rect,
4287                                                dst_surface,
4288                                                dst_rect);
4289             break;
4290         case VA_FOURCC('B', 'G', 'R', 'A'):
4291         case VA_FOURCC('B', 'G', 'R', 'X'):
4292         case VA_FOURCC('R', 'G', 'B', 'A'):
4293         case VA_FOURCC('R', 'G', 'B', 'X'):
4294             status = i965_image_pl1_rgbx_processing(ctx,
4295                                                src_surface,
4296                                                src_rect,
4297                                                dst_surface,
4298                                                dst_rect);
4299             break;
4300         default:
4301             status = VA_STATUS_ERROR_UNIMPLEMENTED;
4302             break;
4303         }
4304         
4305         _i965UnlockMutex(&i965->pp_mutex);
4306     }
4307
4308     return status;
4309 }       
4310
4311 static void
4312 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
4313 {
4314     int i;
4315
4316     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4317     pp_context->surface_state_binding_table.bo = NULL;
4318
4319     dri_bo_unreference(pp_context->curbe.bo);
4320     pp_context->curbe.bo = NULL;
4321
4322     dri_bo_unreference(pp_context->sampler_state_table.bo);
4323     pp_context->sampler_state_table.bo = NULL;
4324
4325     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4326     pp_context->sampler_state_table.bo_8x8 = NULL;
4327
4328     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4329     pp_context->sampler_state_table.bo_8x8_uv = NULL;
4330
4331     dri_bo_unreference(pp_context->idrt.bo);
4332     pp_context->idrt.bo = NULL;
4333     pp_context->idrt.num_interface_descriptors = 0;
4334
4335     dri_bo_unreference(pp_context->vfe_state.bo);
4336     pp_context->vfe_state.bo = NULL;
4337
4338     dri_bo_unreference(pp_context->stmm.bo);
4339     pp_context->stmm.bo = NULL;
4340
4341     for (i = 0; i < NUM_PP_MODULES; i++) {
4342         struct pp_module *pp_module = &pp_context->pp_modules[i];
4343
4344         dri_bo_unreference(pp_module->kernel.bo);
4345         pp_module->kernel.bo = NULL;
4346     }
4347
4348     free(pp_context->pp_static_parameter);
4349     free(pp_context->pp_inline_parameter);
4350     pp_context->pp_static_parameter = NULL;
4351     pp_context->pp_inline_parameter = NULL;
4352 }
4353
4354 Bool
4355 i965_post_processing_terminate(VADriverContextP ctx)
4356 {
4357     struct i965_driver_data *i965 = i965_driver_data(ctx);
4358     struct i965_post_processing_context *pp_context = i965->pp_context;
4359
4360     if (pp_context) {
4361         i965_post_processing_context_finalize(pp_context);
4362         free(pp_context);
4363     }
4364
4365     i965->pp_context = NULL;
4366
4367     return True;
4368 }
4369
4370 static void
4371 i965_post_processing_context_init(VADriverContextP ctx,
4372                                   struct i965_post_processing_context *pp_context,
4373                                   struct intel_batchbuffer *batch)
4374 {
4375     struct i965_driver_data *i965 = i965_driver_data(ctx);
4376     int i;
4377
4378     pp_context->urb.size = URB_SIZE((&i965->intel));
4379     pp_context->urb.num_vfe_entries = 32;
4380     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
4381     pp_context->urb.num_cs_entries = 1;
4382     
4383     if (IS_GEN7(i965->intel.device_id))
4384         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
4385     else
4386         pp_context->urb.size_cs_entry = 2;
4387
4388     pp_context->urb.vfe_start = 0;
4389     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
4390         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
4391     assert(pp_context->urb.cs_start + 
4392            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
4393
4394     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
4395     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
4396     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
4397
4398     if (IS_GEN7(i965->intel.device_id))
4399         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
4400     else if (IS_GEN6(i965->intel.device_id))
4401         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
4402     else if (IS_IRONLAKE(i965->intel.device_id))
4403         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
4404
4405     for (i = 0; i < NUM_PP_MODULES; i++) {
4406         struct pp_module *pp_module = &pp_context->pp_modules[i];
4407         dri_bo_unreference(pp_module->kernel.bo);
4408         if (pp_module->kernel.bin && pp_module->kernel.size) {
4409             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
4410                                                 pp_module->kernel.name,
4411                                                 pp_module->kernel.size,
4412                                                 4096);
4413             assert(pp_module->kernel.bo);
4414             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
4415         } else {
4416             pp_module->kernel.bo = NULL;
4417         }
4418     }
4419
4420     /* static & inline parameters */
4421     if (IS_GEN7(i965->intel.device_id)) {
4422         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
4423         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
4424     } else {
4425         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
4426         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
4427     }
4428
4429     pp_context->batch = batch;
4430 }
4431
4432 Bool
4433 i965_post_processing_init(VADriverContextP ctx)
4434 {
4435     struct i965_driver_data *i965 = i965_driver_data(ctx);
4436     struct i965_post_processing_context *pp_context = i965->pp_context;
4437
4438     if (HAS_PP(i965)) {
4439         if (pp_context == NULL) {
4440             pp_context = calloc(1, sizeof(*pp_context));
4441             i965_post_processing_context_init(ctx, pp_context, i965->batch);
4442             i965->pp_context = pp_context;
4443         }
4444     }
4445
4446     return True;
4447 }
4448
4449 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
4450     PP_NULL,    /* VAProcFilterNone */
4451     PP_NV12_DN, /* VAProcFilterNoiseReduction */
4452     PP_NULL,    /* VAProcFilterDeblocking */
4453     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
4454     PP_NULL,    /* VAProcFilterSharpening */
4455     PP_NULL,    /* VAProcFilterColorBalance */
4456     PP_NULL,    /* VAProcFilterColorStandard */
4457     PP_NULL,    /* VAProcFilterFrameRateConversion */
4458 };
4459
4460 static const int proc_frame_to_pp_frame[3] = {
4461     I965_SURFACE_FLAG_FRAME,
4462     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
4463     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
4464 };
4465
4466 static void 
4467 i965_proc_picture(VADriverContextP ctx, 
4468                   VAProfile profile, 
4469                   union codec_state *codec_state,
4470                   struct hw_context *hw_context)
4471 {
4472     struct i965_driver_data *i965 = i965_driver_data(ctx);
4473     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4474     struct proc_state *proc_state = &codec_state->proc;
4475     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
4476     struct object_surface *obj_surface;
4477     struct i965_surface src_surface, dst_surface;
4478     VARectangle src_rect, dst_rect;
4479     VAStatus status;
4480     int i;
4481     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
4482     int num_tmp_surfaces = 0;
4483     unsigned int tiling = 0, swizzle = 0;
4484     int in_width, in_height;
4485
4486     assert(pipeline_param->surface != VA_INVALID_ID);
4487     assert(proc_state->current_render_target != VA_INVALID_ID);
4488
4489     obj_surface = SURFACE(pipeline_param->surface);
4490     in_width = obj_surface->orig_width;
4491     in_height = obj_surface->orig_height;
4492     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4493
4494     src_surface.id = pipeline_param->surface;
4495     src_surface.type = I965_SURFACE_TYPE_SURFACE;
4496     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4497
4498     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
4499         VASurfaceID out_surface_id = VA_INVALID_ID;
4500
4501         src_surface.id = pipeline_param->surface;
4502         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4503         src_surface.flags = I965_SURFACE_FLAG_FRAME;
4504         src_rect.x = 0;
4505         src_rect.y = 0;
4506         src_rect.width = in_width;
4507         src_rect.height = in_height;
4508
4509         status = i965_CreateSurfaces(ctx,
4510                                      in_width,
4511                                      in_height,
4512                                      VA_RT_FORMAT_YUV420,
4513                                      1,
4514                                      &out_surface_id);
4515         assert(status == VA_STATUS_SUCCESS);
4516         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4517         obj_surface = SURFACE(out_surface_id);
4518         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
4519
4520         dst_surface.id = out_surface_id;
4521         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4522         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4523         dst_rect.x = 0;
4524         dst_rect.y = 0;
4525         dst_rect.width = in_width;
4526         dst_rect.height = in_height;
4527
4528         status = i965_image_processing(ctx,
4529                                        &src_surface,
4530                                        &src_rect,
4531                                        &dst_surface,
4532                                        &dst_rect);
4533         assert(status == VA_STATUS_SUCCESS);
4534
4535         src_surface.id = out_surface_id;
4536         src_surface.type = I965_SURFACE_TYPE_SURFACE;
4537         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
4538     }
4539
4540     if (pipeline_param->surface_region) {
4541         src_rect.x = pipeline_param->surface_region->x;
4542         src_rect.y = pipeline_param->surface_region->y;
4543         src_rect.width = pipeline_param->surface_region->width;
4544         src_rect.height = pipeline_param->surface_region->height;
4545     } else {
4546         src_rect.x = 0;
4547         src_rect.y = 0;
4548         src_rect.width = in_width;
4549         src_rect.height = in_height;
4550     }
4551
4552     if (pipeline_param->output_region) {
4553         dst_rect.x = pipeline_param->output_region->x;
4554         dst_rect.y = pipeline_param->output_region->y;
4555         dst_rect.width = pipeline_param->output_region->width;
4556         dst_rect.height = pipeline_param->output_region->height;
4557     } else {
4558         dst_rect.x = 0;
4559         dst_rect.y = 0;
4560         dst_rect.width = in_width;
4561         dst_rect.height = in_height;
4562     }
4563
4564     obj_surface = SURFACE(proc_state->current_render_target);
4565     i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4566     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
4567     
4568     for (i = 0; i < pipeline_param->num_filters; i++) {
4569         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
4570         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
4571         VAProcFilterType filter_type = filter_param->type;
4572         VASurfaceID out_surface_id = VA_INVALID_ID;
4573         int kernel_index = procfilter_to_pp_flag[filter_type];
4574
4575         if (kernel_index != PP_NULL &&
4576             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
4577             status = i965_CreateSurfaces(ctx,
4578                                          in_width,
4579                                          in_height,
4580                                          VA_RT_FORMAT_YUV420,
4581                                          1,
4582                                          &out_surface_id);
4583             assert(status == VA_STATUS_SUCCESS);
4584             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
4585             obj_surface = SURFACE(out_surface_id);
4586             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4587             dst_surface.id = out_surface_id;
4588             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4589             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
4590                                                    &src_surface,
4591                                                    &src_rect,
4592                                                    &dst_surface,
4593                                                    &src_rect,
4594                                                    kernel_index,
4595                                                    filter_param);
4596
4597             if (status == VA_STATUS_SUCCESS) {
4598                 src_surface.id = dst_surface.id;
4599                 src_surface.type = dst_surface.type;
4600                 src_surface.flags = dst_surface.flags;
4601             }
4602         }
4603     }
4604
4605     dst_surface.id = proc_state->current_render_target;
4606     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4607
4608     if (src_rect.width == dst_rect.width &&
4609         src_rect.height == dst_rect.height) {
4610         i965_post_processing_internal(ctx, &proc_context->pp_context,
4611                                       &src_surface,
4612                                       &src_rect,
4613                                       &dst_surface,
4614                                       &dst_rect,
4615                                       PP_NV12_LOAD_SAVE_N12,
4616                                       NULL);
4617     } else {
4618
4619         i965_post_processing_internal(ctx, &proc_context->pp_context,
4620                                       &src_surface,
4621                                       &src_rect,
4622                                       &dst_surface,
4623                                       &dst_rect,
4624                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
4625                                       PP_NV12_AVS : PP_NV12_SCALING,
4626                                       NULL);
4627     }
4628
4629     if (num_tmp_surfaces)
4630         i965_DestroySurfaces(ctx,
4631                              tmp_surfaces,
4632                              num_tmp_surfaces);
4633
4634     intel_batchbuffer_flush(hw_context->batch);
4635 }
4636
4637 static void
4638 i965_proc_context_destroy(void *hw_context)
4639 {
4640     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
4641
4642     i965_post_processing_context_finalize(&proc_context->pp_context);
4643     intel_batchbuffer_free(proc_context->base.batch);
4644     free(proc_context);
4645 }
4646
4647 struct hw_context *
4648 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
4649 {
4650     struct intel_driver_data *intel = intel_driver_data(ctx);
4651     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
4652
4653     proc_context->base.destroy = i965_proc_context_destroy;
4654     proc_context->base.run = i965_proc_picture;
4655     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
4656     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
4657
4658     return (struct hw_context *)proc_context;
4659 }