VPP: Set the alpha channel when doing the conversion from NV12 to RGBA on Ivy/Haswell/BDW
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
44                      IS_GEN6((ctx)->intel.device_id) ||         \
45                      IS_GEN7((ctx)->intel.device_id) ||         \
46                      IS_GEN8((ctx)->intel.device_id))
47
48
49 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN8,\
50                         MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7))
51
52 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
53 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
54
55 #define GPU_ASM_BLOCK_WIDTH         16
56 #define GPU_ASM_BLOCK_HEIGHT        8
57 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
58
59 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
60
61 extern VAStatus
62 i965_CreateSurfaces(VADriverContextP ctx,
63                     int width,
64                     int height,
65                     int format,
66                     int num_surfaces,
67                     VASurfaceID *surfaces);
68
69 static const uint32_t pp_null_gen5[][4] = {
70 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
71 };
72
73 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
74 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
75 };
76
77 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
78 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
79 };
80
81 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
82 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
83 };
84
85 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
86 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
87 };
88
89 static const uint32_t pp_nv12_scaling_gen5[][4] = {
90 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
91 };
92
93 static const uint32_t pp_nv12_avs_gen5[][4] = {
94 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
95 };
96
97 static const uint32_t pp_nv12_dndi_gen5[][4] = {
98 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
99 };
100
101 static const uint32_t pp_nv12_dn_gen5[][4] = {
102 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
103 };
104
105 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
106 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
107 };
108
109 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
110 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
111 };
112
113 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
114 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
115 };
116
117 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
118 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
119 };
120
121 static const uint32_t pp_pa_load_save_pa_gen5[][4] = {
122 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g4b.gen5"
123 };
124
125 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
126 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
127 };
128
129 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
130 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
131 };
132
133 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
134                                    const struct i965_surface *src_surface,
135                                    const VARectangle *src_rect,
136                                    struct i965_surface *dst_surface,
137                                    const VARectangle *dst_rect,
138                                    void *filter_param);
139 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
140                                             const struct i965_surface *src_surface,
141                                             const VARectangle *src_rect,
142                                             struct i965_surface *dst_surface,
143                                             const VARectangle *dst_rect,
144                                             void *filter_param);
145 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
146                                            const struct i965_surface *src_surface,
147                                            const VARectangle *src_rect,
148                                            struct i965_surface *dst_surface,
149                                            const VARectangle *dst_rect,
150                                            void *filter_param);
151 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
152                                              const struct i965_surface *src_surface,
153                                              const VARectangle *src_rect,
154                                              struct i965_surface *dst_surface,
155                                              const VARectangle *dst_rect,
156                                              void *filter_param);
157 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
158                                                 const struct i965_surface *src_surface,
159                                                 const VARectangle *src_rect,
160                                                 struct i965_surface *dst_surface,
161                                                 const VARectangle *dst_rect,
162                                                 void *filter_param);
163 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
164                                         const struct i965_surface *src_surface,
165                                         const VARectangle *src_rect,
166                                         struct i965_surface *dst_surface,
167                                         const VARectangle *dst_rect,
168                                         void *filter_param);
169 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
170                                       const struct i965_surface *src_surface,
171                                       const VARectangle *src_rect,
172                                       struct i965_surface *dst_surface,
173                                       const VARectangle *dst_rect,
174                                       void *filter_param);
175
176 static struct pp_module pp_modules_gen5[] = {
177     {
178         {
179             "NULL module (for testing)",
180             PP_NULL,
181             pp_null_gen5,
182             sizeof(pp_null_gen5),
183             NULL,
184         },
185
186         pp_null_initialize,
187     },
188
189     {
190         {
191             "NV12_NV12",
192             PP_NV12_LOAD_SAVE_N12,
193             pp_nv12_load_save_nv12_gen5,
194             sizeof(pp_nv12_load_save_nv12_gen5),
195             NULL,
196         },
197
198         pp_plx_load_save_plx_initialize,
199     },
200
201     {
202         {
203             "NV12_PL3",
204             PP_NV12_LOAD_SAVE_PL3,
205             pp_nv12_load_save_pl3_gen5,
206             sizeof(pp_nv12_load_save_pl3_gen5),
207             NULL,
208         },
209
210         pp_plx_load_save_plx_initialize,
211     },
212
213     {
214         {
215             "PL3_NV12",
216             PP_PL3_LOAD_SAVE_N12,
217             pp_pl3_load_save_nv12_gen5,
218             sizeof(pp_pl3_load_save_nv12_gen5),
219             NULL,
220         },
221
222         pp_plx_load_save_plx_initialize,
223     },
224
225     {
226         {
227             "PL3_PL3",
228             PP_PL3_LOAD_SAVE_PL3,
229             pp_pl3_load_save_pl3_gen5,
230             sizeof(pp_pl3_load_save_pl3_gen5),
231             NULL,
232         },
233
234         pp_plx_load_save_plx_initialize
235     },
236
237     {
238         {
239             "NV12 Scaling module",
240             PP_NV12_SCALING,
241             pp_nv12_scaling_gen5,
242             sizeof(pp_nv12_scaling_gen5),
243             NULL,
244         },
245
246         pp_nv12_scaling_initialize,
247     },
248
249     {
250         {
251             "NV12 AVS module",
252             PP_NV12_AVS,
253             pp_nv12_avs_gen5,
254             sizeof(pp_nv12_avs_gen5),
255             NULL,
256         },
257
258         pp_nv12_avs_initialize_nlas,
259     },
260
261     {
262         {
263             "NV12 DNDI module",
264             PP_NV12_DNDI,
265             pp_nv12_dndi_gen5,
266             sizeof(pp_nv12_dndi_gen5),
267             NULL,
268         },
269
270         pp_nv12_dndi_initialize,
271     },
272
273     {
274         {
275             "NV12 DN module",
276             PP_NV12_DN,
277             pp_nv12_dn_gen5,
278             sizeof(pp_nv12_dn_gen5),
279             NULL,
280         },
281
282         pp_nv12_dn_initialize,
283     },
284
285     {
286         {
287             "NV12_PA module",
288             PP_NV12_LOAD_SAVE_PA,
289             pp_nv12_load_save_pa_gen5,
290             sizeof(pp_nv12_load_save_pa_gen5),
291             NULL,
292         },
293     
294         pp_plx_load_save_plx_initialize,
295     },
296
297     {
298         {
299             "PL3_PA module",
300             PP_PL3_LOAD_SAVE_PA,
301             pp_pl3_load_save_pa_gen5,
302             sizeof(pp_pl3_load_save_pa_gen5),
303             NULL,
304         },
305     
306         pp_plx_load_save_plx_initialize,
307     },
308
309     {
310         {
311             "PA_NV12 module",
312             PP_PA_LOAD_SAVE_NV12,
313             pp_pa_load_save_nv12_gen5,
314             sizeof(pp_pa_load_save_nv12_gen5),
315             NULL,
316         },
317     
318         pp_plx_load_save_plx_initialize,
319     },
320
321     {
322         {
323             "PA_PL3 module",
324             PP_PA_LOAD_SAVE_PL3,
325             pp_pa_load_save_pl3_gen5,
326             sizeof(pp_pa_load_save_pl3_gen5),
327             NULL,
328         },
329     
330         pp_plx_load_save_plx_initialize,
331     },
332
333     {
334         {
335             "PA_PA module",
336             PP_PA_LOAD_SAVE_PA,
337             pp_pa_load_save_pa_gen5,
338             sizeof(pp_pa_load_save_pa_gen5),
339             NULL,
340         },
341
342         pp_plx_load_save_plx_initialize,
343     },
344
345     {
346         {
347             "RGBX_NV12 module",
348             PP_RGBX_LOAD_SAVE_NV12,
349             pp_rgbx_load_save_nv12_gen5,
350             sizeof(pp_rgbx_load_save_nv12_gen5),
351             NULL,
352         },
353     
354         pp_plx_load_save_plx_initialize,
355     },
356             
357     {
358         {
359             "NV12_RGBX module",
360             PP_NV12_LOAD_SAVE_RGBX,
361             pp_nv12_load_save_rgbx_gen5,
362             sizeof(pp_nv12_load_save_rgbx_gen5),
363             NULL,
364         },
365     
366         pp_plx_load_save_plx_initialize,
367     },
368 };
369
370 static const uint32_t pp_null_gen6[][4] = {
371 #include "shaders/post_processing/gen5_6/null.g6b"
372 };
373
374 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
375 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
376 };
377
378 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
379 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
380 };
381
382 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
383 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
384 };
385
386 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
387 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
388 };
389
390 static const uint32_t pp_nv12_scaling_gen6[][4] = {
391 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
392 };
393
394 static const uint32_t pp_nv12_avs_gen6[][4] = {
395 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
396 };
397
398 static const uint32_t pp_nv12_dndi_gen6[][4] = {
399 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
400 };
401
402 static const uint32_t pp_nv12_dn_gen6[][4] = {
403 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
404 };
405
406 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
407 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
408 };
409
410 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
411 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
412 };
413
414 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
415 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
416 };
417
418 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
419 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
420 };
421
422 static const uint32_t pp_pa_load_save_pa_gen6[][4] = {
423 #include "shaders/post_processing/gen5_6/pa_load_save_pa.g6b"
424 };
425
426 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
427 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
428 };
429
430 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
431 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
432 };
433
434 static struct pp_module pp_modules_gen6[] = {
435     {
436         {
437             "NULL module (for testing)",
438             PP_NULL,
439             pp_null_gen6,
440             sizeof(pp_null_gen6),
441             NULL,
442         },
443
444         pp_null_initialize,
445     },
446
447     {
448         {
449             "NV12_NV12",
450             PP_NV12_LOAD_SAVE_N12,
451             pp_nv12_load_save_nv12_gen6,
452             sizeof(pp_nv12_load_save_nv12_gen6),
453             NULL,
454         },
455
456         pp_plx_load_save_plx_initialize,
457     },
458
459     {
460         {
461             "NV12_PL3",
462             PP_NV12_LOAD_SAVE_PL3,
463             pp_nv12_load_save_pl3_gen6,
464             sizeof(pp_nv12_load_save_pl3_gen6),
465             NULL,
466         },
467         
468         pp_plx_load_save_plx_initialize,
469     },
470
471     {
472         {
473             "PL3_NV12",
474             PP_PL3_LOAD_SAVE_N12,
475             pp_pl3_load_save_nv12_gen6,
476             sizeof(pp_pl3_load_save_nv12_gen6),
477             NULL,
478         },
479
480         pp_plx_load_save_plx_initialize,
481     },
482
483     {
484         {
485             "PL3_PL3",
486             PP_PL3_LOAD_SAVE_PL3,
487             pp_pl3_load_save_pl3_gen6,
488             sizeof(pp_pl3_load_save_pl3_gen6),
489             NULL,
490         },
491
492         pp_plx_load_save_plx_initialize,
493     },
494
495     {
496         {
497             "NV12 Scaling module",
498             PP_NV12_SCALING,
499             pp_nv12_scaling_gen6,
500             sizeof(pp_nv12_scaling_gen6),
501             NULL,
502         },
503
504         gen6_nv12_scaling_initialize,
505     },
506
507     {
508         {
509             "NV12 AVS module",
510             PP_NV12_AVS,
511             pp_nv12_avs_gen6,
512             sizeof(pp_nv12_avs_gen6),
513             NULL,
514         },
515
516         pp_nv12_avs_initialize_nlas,
517     },
518
519     {
520         {
521             "NV12 DNDI module",
522             PP_NV12_DNDI,
523             pp_nv12_dndi_gen6,
524             sizeof(pp_nv12_dndi_gen6),
525             NULL,
526         },
527
528         pp_nv12_dndi_initialize,
529     },
530
531     {
532         {
533             "NV12 DN module",
534             PP_NV12_DN,
535             pp_nv12_dn_gen6,
536             sizeof(pp_nv12_dn_gen6),
537             NULL,
538         },
539
540         pp_nv12_dn_initialize,
541     },
542     {
543         {
544             "NV12_PA module",
545             PP_NV12_LOAD_SAVE_PA,
546             pp_nv12_load_save_pa_gen6,
547             sizeof(pp_nv12_load_save_pa_gen6),
548             NULL,
549         },
550     
551         pp_plx_load_save_plx_initialize,
552     },
553
554     {
555         {
556             "PL3_PA module",
557             PP_PL3_LOAD_SAVE_PA,
558             pp_pl3_load_save_pa_gen6,
559             sizeof(pp_pl3_load_save_pa_gen6),
560             NULL,
561         },
562     
563         pp_plx_load_save_plx_initialize,
564     },
565
566     {
567         {
568             "PA_NV12 module",
569             PP_PA_LOAD_SAVE_NV12,
570             pp_pa_load_save_nv12_gen6,
571             sizeof(pp_pa_load_save_nv12_gen6),
572             NULL,
573         },
574     
575         pp_plx_load_save_plx_initialize,
576     },
577
578     {
579         {
580             "PA_PL3 module",
581             PP_PA_LOAD_SAVE_PL3,
582             pp_pa_load_save_pl3_gen6,
583             sizeof(pp_pa_load_save_pl3_gen6),
584             NULL,
585         },
586     
587         pp_plx_load_save_plx_initialize,
588     },
589
590     {
591         {
592             "PA_PA module",
593             PP_PA_LOAD_SAVE_PA,
594             pp_pa_load_save_pa_gen6,
595             sizeof(pp_pa_load_save_pa_gen6),
596             NULL,
597         },
598
599         pp_plx_load_save_plx_initialize,
600     },
601
602     {
603         {
604             "RGBX_NV12 module",
605             PP_RGBX_LOAD_SAVE_NV12,
606             pp_rgbx_load_save_nv12_gen6,
607             sizeof(pp_rgbx_load_save_nv12_gen6),
608             NULL,
609         },
610     
611         pp_plx_load_save_plx_initialize,
612     },
613
614     {
615         {
616             "NV12_RGBX module",
617             PP_NV12_LOAD_SAVE_RGBX,
618             pp_nv12_load_save_rgbx_gen6,
619             sizeof(pp_nv12_load_save_rgbx_gen6),
620             NULL,
621         },
622     
623         pp_plx_load_save_plx_initialize,
624     },
625 };
626
627 static const uint32_t pp_null_gen7[][4] = {
628 };
629
630 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
632 };
633
634 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
635 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
636 };
637
638 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
639 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
640 };
641
642 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
643 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
644 };
645
646 static const uint32_t pp_nv12_scaling_gen7[][4] = {
647 #include "shaders/post_processing/gen7/avs.g7b"
648 };
649
650 static const uint32_t pp_nv12_avs_gen7[][4] = {
651 #include "shaders/post_processing/gen7/avs.g7b"
652 };
653
654 static const uint32_t pp_nv12_dndi_gen7[][4] = {
655 #include "shaders/post_processing/gen7/dndi.g7b"
656 };
657
658 static const uint32_t pp_nv12_dn_gen7[][4] = {
659 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
660 };
661 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
662 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
663 };
664 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
665 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
666 };
667 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
668 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
669 };
670 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
671 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
672 };
673 static const uint32_t pp_pa_load_save_pa_gen7[][4] = {
674 #include "shaders/post_processing/gen7/pa_to_pa.g7b"
675 };
676 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
677 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
678 };
679 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
680 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
681 };
682
683 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
684                                            const struct i965_surface *src_surface,
685                                            const VARectangle *src_rect,
686                                            struct i965_surface *dst_surface,
687                                            const VARectangle *dst_rect,
688                                            void *filter_param);
689 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
690                                              const struct i965_surface *src_surface,
691                                              const VARectangle *src_rect,
692                                              struct i965_surface *dst_surface,
693                                              const VARectangle *dst_rect,
694                                              void *filter_param);
695 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
696                                            const struct i965_surface *src_surface,
697                                            const VARectangle *src_rect,
698                                            struct i965_surface *dst_surface,
699                                            const VARectangle *dst_rect,
700                                            void *filter_param);
701
702 static struct pp_module pp_modules_gen7[] = {
703     {
704         {
705             "NULL module (for testing)",
706             PP_NULL,
707             pp_null_gen7,
708             sizeof(pp_null_gen7),
709             NULL,
710         },
711
712         pp_null_initialize,
713     },
714
715     {
716         {
717             "NV12_NV12",
718             PP_NV12_LOAD_SAVE_N12,
719             pp_nv12_load_save_nv12_gen7,
720             sizeof(pp_nv12_load_save_nv12_gen7),
721             NULL,
722         },
723
724         gen7_pp_plx_avs_initialize,
725     },
726
727     {
728         {
729             "NV12_PL3",
730             PP_NV12_LOAD_SAVE_PL3,
731             pp_nv12_load_save_pl3_gen7,
732             sizeof(pp_nv12_load_save_pl3_gen7),
733             NULL,
734         },
735         
736         gen7_pp_plx_avs_initialize,
737     },
738
739     {
740         {
741             "PL3_NV12",
742             PP_PL3_LOAD_SAVE_N12,
743             pp_pl3_load_save_nv12_gen7,
744             sizeof(pp_pl3_load_save_nv12_gen7),
745             NULL,
746         },
747
748         gen7_pp_plx_avs_initialize,
749     },
750
751     {
752         {
753             "PL3_PL3",
754             PP_PL3_LOAD_SAVE_PL3,
755             pp_pl3_load_save_pl3_gen7,
756             sizeof(pp_pl3_load_save_pl3_gen7),
757             NULL,
758         },
759
760         gen7_pp_plx_avs_initialize,
761     },
762
763     {
764         {
765             "NV12 Scaling module",
766             PP_NV12_SCALING,
767             pp_nv12_scaling_gen7,
768             sizeof(pp_nv12_scaling_gen7),
769             NULL,
770         },
771
772         gen7_pp_plx_avs_initialize,
773     },
774
775     {
776         {
777             "NV12 AVS module",
778             PP_NV12_AVS,
779             pp_nv12_avs_gen7,
780             sizeof(pp_nv12_avs_gen7),
781             NULL,
782         },
783
784         gen7_pp_plx_avs_initialize,
785     },
786
787     {
788         {
789             "NV12 DNDI module",
790             PP_NV12_DNDI,
791             pp_nv12_dndi_gen7,
792             sizeof(pp_nv12_dndi_gen7),
793             NULL,
794         },
795
796         gen7_pp_nv12_dndi_initialize,
797     },
798
799     {
800         {
801             "NV12 DN module",
802             PP_NV12_DN,
803             pp_nv12_dn_gen7,
804             sizeof(pp_nv12_dn_gen7),
805             NULL,
806         },
807
808         gen7_pp_nv12_dn_initialize,
809     },
810     {
811         {
812             "NV12_PA module",
813             PP_NV12_LOAD_SAVE_PA,
814             pp_nv12_load_save_pa_gen7,
815             sizeof(pp_nv12_load_save_pa_gen7),
816             NULL,
817         },
818     
819         gen7_pp_plx_avs_initialize,
820     },
821
822     {
823         {
824             "PL3_PA module",
825             PP_PL3_LOAD_SAVE_PA,
826             pp_pl3_load_save_pa_gen7,
827             sizeof(pp_pl3_load_save_pa_gen7),
828             NULL,
829         },
830     
831         gen7_pp_plx_avs_initialize,
832     },
833
834     {
835         {
836             "PA_NV12 module",
837             PP_PA_LOAD_SAVE_NV12,
838             pp_pa_load_save_nv12_gen7,
839             sizeof(pp_pa_load_save_nv12_gen7),
840             NULL,
841         },
842     
843         gen7_pp_plx_avs_initialize,
844     },
845
846     {
847         {
848             "PA_PL3 module",
849             PP_PA_LOAD_SAVE_PL3,
850             pp_pa_load_save_pl3_gen7,
851             sizeof(pp_pa_load_save_pl3_gen7),
852             NULL,
853         },
854     
855         gen7_pp_plx_avs_initialize,
856     },
857
858     {
859         {
860             "PA_PA module",
861             PP_PA_LOAD_SAVE_PA,
862             pp_pa_load_save_pa_gen7,
863             sizeof(pp_pa_load_save_pa_gen7),
864             NULL,
865         },
866
867         gen7_pp_plx_avs_initialize,
868     },
869
870     {
871         {
872             "RGBX_NV12 module",
873             PP_RGBX_LOAD_SAVE_NV12,
874             pp_rgbx_load_save_nv12_gen7,
875             sizeof(pp_rgbx_load_save_nv12_gen7),
876             NULL,
877         },
878     
879         gen7_pp_plx_avs_initialize,
880     },
881
882     {
883         {
884             "NV12_RGBX module",
885             PP_NV12_LOAD_SAVE_RGBX,
886             pp_nv12_load_save_rgbx_gen7,
887             sizeof(pp_nv12_load_save_rgbx_gen7),
888             NULL,
889         },
890     
891         gen7_pp_plx_avs_initialize,
892     },
893             
894 };
895
896 static const uint32_t pp_null_gen75[][4] = {
897 };
898
899 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
900 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
901 };
902
903 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
904 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
905 };
906
907 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
908 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
909 };
910
911 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
912 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
913 };
914
915 static const uint32_t pp_nv12_scaling_gen75[][4] = {
916 #include "shaders/post_processing/gen7/avs.g75b"
917 };
918
919 static const uint32_t pp_nv12_avs_gen75[][4] = {
920 #include "shaders/post_processing/gen7/avs.g75b"
921 };
922
923 static const uint32_t pp_nv12_dndi_gen75[][4] = {
924 // #include "shaders/post_processing/gen7/dndi.g75b"
925 };
926
927 static const uint32_t pp_nv12_dn_gen75[][4] = {
928 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
929 };
930 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
931 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
932 };
933 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
934 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
935 };
936 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
937 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
938 };
939 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
940 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
941 };
942 static const uint32_t pp_pa_load_save_pa_gen75[][4] = {
943 #include "shaders/post_processing/gen7/pa_to_pa.g75b"
944 };
945 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
946 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
947 };
948 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
949 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
950 };
951
952 static struct pp_module pp_modules_gen75[] = {
953     {
954         {
955             "NULL module (for testing)",
956             PP_NULL,
957             pp_null_gen75,
958             sizeof(pp_null_gen75),
959             NULL,
960         },
961
962         pp_null_initialize,
963     },
964
965     {
966         {
967             "NV12_NV12",
968             PP_NV12_LOAD_SAVE_N12,
969             pp_nv12_load_save_nv12_gen75,
970             sizeof(pp_nv12_load_save_nv12_gen75),
971             NULL,
972         },
973
974         gen7_pp_plx_avs_initialize,
975     },
976
977     {
978         {
979             "NV12_PL3",
980             PP_NV12_LOAD_SAVE_PL3,
981             pp_nv12_load_save_pl3_gen75,
982             sizeof(pp_nv12_load_save_pl3_gen75),
983             NULL,
984         },
985         
986         gen7_pp_plx_avs_initialize,
987     },
988
989     {
990         {
991             "PL3_NV12",
992             PP_PL3_LOAD_SAVE_N12,
993             pp_pl3_load_save_nv12_gen75,
994             sizeof(pp_pl3_load_save_nv12_gen75),
995             NULL,
996         },
997
998         gen7_pp_plx_avs_initialize,
999     },
1000
1001     {
1002         {
1003             "PL3_PL3",
1004             PP_PL3_LOAD_SAVE_PL3,
1005             pp_pl3_load_save_pl3_gen75,
1006             sizeof(pp_pl3_load_save_pl3_gen75),
1007             NULL,
1008         },
1009
1010         gen7_pp_plx_avs_initialize,
1011     },
1012
1013     {
1014         {
1015             "NV12 Scaling module",
1016             PP_NV12_SCALING,
1017             pp_nv12_scaling_gen75,
1018             sizeof(pp_nv12_scaling_gen75),
1019             NULL,
1020         },
1021
1022         gen7_pp_plx_avs_initialize,
1023     },
1024
1025     {
1026         {
1027             "NV12 AVS module",
1028             PP_NV12_AVS,
1029             pp_nv12_avs_gen75,
1030             sizeof(pp_nv12_avs_gen75),
1031             NULL,
1032         },
1033
1034         gen7_pp_plx_avs_initialize,
1035     },
1036
1037     {
1038         {
1039             "NV12 DNDI module",
1040             PP_NV12_DNDI,
1041             pp_nv12_dndi_gen75,
1042             sizeof(pp_nv12_dndi_gen75),
1043             NULL,
1044         },
1045
1046         gen7_pp_nv12_dn_initialize,
1047     },
1048
1049     {
1050         {
1051             "NV12 DN module",
1052             PP_NV12_DN,
1053             pp_nv12_dn_gen75,
1054             sizeof(pp_nv12_dn_gen75),
1055             NULL,
1056         },
1057
1058         gen7_pp_nv12_dn_initialize,
1059     },
1060
1061     {
1062         {
1063             "NV12_PA module",
1064             PP_NV12_LOAD_SAVE_PA,
1065             pp_nv12_load_save_pa_gen75,
1066             sizeof(pp_nv12_load_save_pa_gen75),
1067             NULL,
1068         },
1069     
1070         gen7_pp_plx_avs_initialize,
1071     },
1072
1073     {
1074         {
1075             "PL3_PA module",
1076             PP_PL3_LOAD_SAVE_PA,
1077             pp_pl3_load_save_pa_gen75,
1078             sizeof(pp_pl3_load_save_pa_gen75),
1079             NULL,
1080         },
1081     
1082         gen7_pp_plx_avs_initialize,
1083     },
1084
1085     {
1086         {
1087             "PA_NV12 module",
1088             PP_PA_LOAD_SAVE_NV12,
1089             pp_pa_load_save_nv12_gen75,
1090             sizeof(pp_pa_load_save_nv12_gen75),
1091             NULL,
1092         },
1093     
1094         gen7_pp_plx_avs_initialize,
1095     },
1096
1097     {
1098         {
1099             "PA_PL3 module",
1100             PP_PA_LOAD_SAVE_PL3,
1101             pp_pa_load_save_pl3_gen75,
1102             sizeof(pp_pa_load_save_pl3_gen75),
1103             NULL,
1104         },
1105     
1106         gen7_pp_plx_avs_initialize,
1107     },
1108
1109     {
1110         {
1111             "PA_PA module",
1112             PP_PA_LOAD_SAVE_PA,
1113             pp_pa_load_save_pa_gen75,
1114             sizeof(pp_pa_load_save_pa_gen75),
1115             NULL,
1116         },
1117
1118         gen7_pp_plx_avs_initialize,
1119     },
1120
1121     {
1122         {
1123             "RGBX_NV12 module",
1124             PP_RGBX_LOAD_SAVE_NV12,
1125             pp_rgbx_load_save_nv12_gen75,
1126             sizeof(pp_rgbx_load_save_nv12_gen75),
1127             NULL,
1128         },
1129     
1130         gen7_pp_plx_avs_initialize,
1131     },
1132
1133     {
1134         {
1135             "NV12_RGBX module",
1136             PP_NV12_LOAD_SAVE_RGBX,
1137             pp_nv12_load_save_rgbx_gen75,
1138             sizeof(pp_nv12_load_save_rgbx_gen75),
1139             NULL,
1140         },
1141     
1142         gen7_pp_plx_avs_initialize,
1143     },
1144             
1145 };
1146
1147 static int
1148 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1149 {
1150     int fourcc;
1151
1152     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1153         struct object_image *obj_image = (struct object_image *)surface->base;
1154         fourcc = obj_image->image.format.fourcc;
1155     } else {
1156         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1157         fourcc = obj_surface->fourcc;
1158     }
1159
1160     return fourcc;
1161 }
1162
1163 static void
1164 pp_get_surface_size(VADriverContextP ctx, const struct i965_surface *surface, int *width, int *height)
1165 {
1166     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1167         struct object_image *obj_image = (struct object_image *)surface->base;
1168
1169         *width = obj_image->image.width;
1170         *height = obj_image->image.height;
1171     } else {
1172         struct object_surface *obj_surface = (struct object_surface *)surface->base;
1173
1174         *width = obj_surface->orig_width;
1175         *height = obj_surface->orig_height;
1176     }
1177 }
1178
1179 static void
1180 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1181 {
1182     switch (tiling) {
1183     case I915_TILING_NONE:
1184         ss->ss3.tiled_surface = 0;
1185         ss->ss3.tile_walk = 0;
1186         break;
1187     case I915_TILING_X:
1188         ss->ss3.tiled_surface = 1;
1189         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1190         break;
1191     case I915_TILING_Y:
1192         ss->ss3.tiled_surface = 1;
1193         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1194         break;
1195     }
1196 }
1197
1198 static void
1199 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1200 {
1201     switch (tiling) {
1202     case I915_TILING_NONE:
1203         ss->ss2.tiled_surface = 0;
1204         ss->ss2.tile_walk = 0;
1205         break;
1206     case I915_TILING_X:
1207         ss->ss2.tiled_surface = 1;
1208         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1209         break;
1210     case I915_TILING_Y:
1211         ss->ss2.tiled_surface = 1;
1212         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1213         break;
1214     }
1215 }
1216
1217 static void
1218 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1219 {
1220     switch (tiling) {
1221     case I915_TILING_NONE:
1222         ss->ss0.tiled_surface = 0;
1223         ss->ss0.tile_walk = 0;
1224         break;
1225     case I915_TILING_X:
1226         ss->ss0.tiled_surface = 1;
1227         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1228         break;
1229     case I915_TILING_Y:
1230         ss->ss0.tiled_surface = 1;
1231         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1232         break;
1233     }
1234 }
1235
1236 static void
1237 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1238 {
1239     switch (tiling) {
1240     case I915_TILING_NONE:
1241         ss->ss2.tiled_surface = 0;
1242         ss->ss2.tile_walk = 0;
1243         break;
1244     case I915_TILING_X:
1245         ss->ss2.tiled_surface = 1;
1246         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1247         break;
1248     case I915_TILING_Y:
1249         ss->ss2.tiled_surface = 1;
1250         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1251         break;
1252     }
1253 }
1254
1255 static void
1256 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1257 {
1258     struct i965_interface_descriptor *desc;
1259     dri_bo *bo;
1260     int pp_index = pp_context->current_pp;
1261
1262     bo = pp_context->idrt.bo;
1263     dri_bo_map(bo, 1);
1264     assert(bo->virtual);
1265     desc = bo->virtual;
1266     memset(desc, 0, sizeof(*desc));
1267     desc->desc0.grf_reg_blocks = 10;
1268     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1269     desc->desc1.const_urb_entry_read_offset = 0;
1270     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1271     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1272     desc->desc2.sampler_count = 0;
1273     desc->desc3.binding_table_entry_count = 0;
1274     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1275
1276     dri_bo_emit_reloc(bo,
1277                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1278                       desc->desc0.grf_reg_blocks,
1279                       offsetof(struct i965_interface_descriptor, desc0),
1280                       pp_context->pp_modules[pp_index].kernel.bo);
1281
1282     dri_bo_emit_reloc(bo,
1283                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1284                       desc->desc2.sampler_count << 2,
1285                       offsetof(struct i965_interface_descriptor, desc2),
1286                       pp_context->sampler_state_table.bo);
1287
1288     dri_bo_unmap(bo);
1289     pp_context->idrt.num_interface_descriptors++;
1290 }
1291
1292 static void
1293 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1294 {
1295     struct i965_vfe_state *vfe_state;
1296     dri_bo *bo;
1297
1298     bo = pp_context->vfe_state.bo;
1299     dri_bo_map(bo, 1);
1300     assert(bo->virtual);
1301     vfe_state = bo->virtual;
1302     memset(vfe_state, 0, sizeof(*vfe_state));
1303     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1304     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1305     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1306     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1307     vfe_state->vfe1.children_present = 0;
1308     vfe_state->vfe2.interface_descriptor_base = 
1309         pp_context->idrt.bo->offset >> 4; /* reloc */
1310     dri_bo_emit_reloc(bo,
1311                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1312                       0,
1313                       offsetof(struct i965_vfe_state, vfe2),
1314                       pp_context->idrt.bo);
1315     dri_bo_unmap(bo);
1316 }
1317
1318 static void
1319 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1320 {
1321     unsigned char *constant_buffer;
1322     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1323
1324     assert(sizeof(*pp_static_parameter) == 128);
1325     dri_bo_map(pp_context->curbe.bo, 1);
1326     assert(pp_context->curbe.bo->virtual);
1327     constant_buffer = pp_context->curbe.bo->virtual;
1328     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1329     dri_bo_unmap(pp_context->curbe.bo);
1330 }
1331
1332 static void
1333 ironlake_pp_states_setup(VADriverContextP ctx,
1334                          struct i965_post_processing_context *pp_context)
1335 {
1336     ironlake_pp_interface_descriptor_table(pp_context);
1337     ironlake_pp_vfe_state(pp_context);
1338     ironlake_pp_upload_constants(pp_context);
1339 }
1340
1341 static void
1342 ironlake_pp_pipeline_select(VADriverContextP ctx,
1343                             struct i965_post_processing_context *pp_context)
1344 {
1345     struct intel_batchbuffer *batch = pp_context->batch;
1346
1347     BEGIN_BATCH(batch, 1);
1348     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1349     ADVANCE_BATCH(batch);
1350 }
1351
1352 static void
1353 ironlake_pp_urb_layout(VADriverContextP ctx,
1354                        struct i965_post_processing_context *pp_context)
1355 {
1356     struct intel_batchbuffer *batch = pp_context->batch;
1357     unsigned int vfe_fence, cs_fence;
1358
1359     vfe_fence = pp_context->urb.cs_start;
1360     cs_fence = pp_context->urb.size;
1361
1362     BEGIN_BATCH(batch, 3);
1363     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1364     OUT_BATCH(batch, 0);
1365     OUT_BATCH(batch, 
1366               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1367               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1368     ADVANCE_BATCH(batch);
1369 }
1370
1371 static void
1372 ironlake_pp_state_base_address(VADriverContextP ctx,
1373                                struct i965_post_processing_context *pp_context)
1374 {
1375     struct intel_batchbuffer *batch = pp_context->batch;
1376
1377     BEGIN_BATCH(batch, 8);
1378     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1379     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1380     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1381     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1382     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1383     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1384     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1385     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1386     ADVANCE_BATCH(batch);
1387 }
1388
1389 static void
1390 ironlake_pp_state_pointers(VADriverContextP ctx,
1391                            struct i965_post_processing_context *pp_context)
1392 {
1393     struct intel_batchbuffer *batch = pp_context->batch;
1394
1395     BEGIN_BATCH(batch, 3);
1396     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1397     OUT_BATCH(batch, 0);
1398     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1399     ADVANCE_BATCH(batch);
1400 }
1401
1402 static void 
1403 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1404                           struct i965_post_processing_context *pp_context)
1405 {
1406     struct intel_batchbuffer *batch = pp_context->batch;
1407
1408     BEGIN_BATCH(batch, 2);
1409     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1410     OUT_BATCH(batch,
1411               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1412               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1413     ADVANCE_BATCH(batch);
1414 }
1415
1416 static void
1417 ironlake_pp_constant_buffer(VADriverContextP ctx,
1418                             struct i965_post_processing_context *pp_context)
1419 {
1420     struct intel_batchbuffer *batch = pp_context->batch;
1421
1422     BEGIN_BATCH(batch, 2);
1423     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1424     OUT_RELOC(batch, pp_context->curbe.bo,
1425               I915_GEM_DOMAIN_INSTRUCTION, 0,
1426               pp_context->urb.size_cs_entry - 1);
1427     ADVANCE_BATCH(batch);    
1428 }
1429
1430 static void
1431 ironlake_pp_object_walker(VADriverContextP ctx,
1432                           struct i965_post_processing_context *pp_context)
1433 {
1434     struct intel_batchbuffer *batch = pp_context->batch;
1435     int x, x_steps, y, y_steps;
1436     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1437
1438     x_steps = pp_context->pp_x_steps(pp_context->private_context);
1439     y_steps = pp_context->pp_y_steps(pp_context->private_context);
1440
1441     for (y = 0; y < y_steps; y++) {
1442         for (x = 0; x < x_steps; x++) {
1443             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1444                 BEGIN_BATCH(batch, 20);
1445                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1446                 OUT_BATCH(batch, 0);
1447                 OUT_BATCH(batch, 0); /* no indirect data */
1448                 OUT_BATCH(batch, 0);
1449
1450                 /* inline data grf 5-6 */
1451                 assert(sizeof(*pp_inline_parameter) == 64);
1452                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1453
1454                 ADVANCE_BATCH(batch);
1455             }
1456         }
1457     }
1458 }
1459
1460 static void
1461 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1462                            struct i965_post_processing_context *pp_context)
1463 {
1464     struct intel_batchbuffer *batch = pp_context->batch;
1465
1466     intel_batchbuffer_start_atomic(batch, 0x1000);
1467     intel_batchbuffer_emit_mi_flush(batch);
1468     ironlake_pp_pipeline_select(ctx, pp_context);
1469     ironlake_pp_state_base_address(ctx, pp_context);
1470     ironlake_pp_state_pointers(ctx, pp_context);
1471     ironlake_pp_urb_layout(ctx, pp_context);
1472     ironlake_pp_cs_urb_layout(ctx, pp_context);
1473     ironlake_pp_constant_buffer(ctx, pp_context);
1474     ironlake_pp_object_walker(ctx, pp_context);
1475     intel_batchbuffer_end_atomic(batch);
1476 }
1477
1478 // update u/v offset when the surface format are packed yuv
1479 static void i965_update_src_surface_static_parameter(
1480     VADriverContextP    ctx, 
1481     struct i965_post_processing_context *pp_context,
1482     const struct i965_surface *surface)
1483 {
1484     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1485     int fourcc = pp_get_surface_fourcc(ctx, surface);
1486
1487     switch (fourcc) {
1488     case VA_FOURCC_YUY2:
1489         pp_static_parameter->grf1.source_packed_u_offset = 1;
1490         pp_static_parameter->grf1.source_packed_v_offset = 3;
1491         break;
1492     case VA_FOURCC_UYVY:
1493         pp_static_parameter->grf1.source_packed_y_offset = 1;
1494         pp_static_parameter->grf1.source_packed_v_offset = 2;
1495         break;
1496     case VA_FOURCC_BGRX:
1497     case VA_FOURCC_BGRA:
1498         pp_static_parameter->grf1.source_rgb_layout = 0;
1499         break;
1500     case VA_FOURCC_RGBX:
1501     case VA_FOURCC_RGBA:
1502         pp_static_parameter->grf1.source_rgb_layout = 1;
1503         break;
1504     default:
1505         break;
1506     }
1507     
1508 }
1509
1510 static void i965_update_dst_surface_static_parameter(
1511     VADriverContextP    ctx, 
1512     struct i965_post_processing_context *pp_context,
1513     const struct i965_surface *surface)
1514 {
1515     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1516     int fourcc = pp_get_surface_fourcc(ctx, surface);
1517
1518     switch (fourcc) {
1519     case VA_FOURCC_YUY2:
1520         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1521         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1522         break;
1523     case VA_FOURCC_UYVY:
1524         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1525         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1526         break;
1527     case VA_FOURCC_BGRX:
1528     case VA_FOURCC_BGRA:
1529         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1530         break;
1531     case VA_FOURCC_RGBX:
1532     case VA_FOURCC_RGBA:
1533         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1534         break;
1535     default:
1536         break;
1537     }
1538     
1539 }
1540
1541 static void
1542 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1543                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1544                           int width, int height, int pitch, int format, 
1545                           int index, int is_target)
1546 {
1547     struct i965_surface_state *ss;
1548     dri_bo *ss_bo;
1549     unsigned int tiling;
1550     unsigned int swizzle;
1551
1552     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1553     ss_bo = pp_context->surface_state_binding_table.bo;
1554     assert(ss_bo);
1555
1556     dri_bo_map(ss_bo, True);
1557     assert(ss_bo->virtual);
1558     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1559     memset(ss, 0, sizeof(*ss));
1560     ss->ss0.surface_type = I965_SURFACE_2D;
1561     ss->ss0.surface_format = format;
1562     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1563     ss->ss2.width = width - 1;
1564     ss->ss2.height = height - 1;
1565     ss->ss3.pitch = pitch - 1;
1566     pp_set_surface_tiling(ss, tiling);
1567     dri_bo_emit_reloc(ss_bo,
1568                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1569                       surf_bo_offset,
1570                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1571                       surf_bo);
1572     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1573     dri_bo_unmap(ss_bo);
1574 }
1575
1576 static void
1577 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1578                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1579                            int width, int height, int wpitch,
1580                            int xoffset, int yoffset,
1581                            int format, int interleave_chroma,
1582                            int index)
1583 {
1584     struct i965_surface_state2 *ss2;
1585     dri_bo *ss2_bo;
1586     unsigned int tiling;
1587     unsigned int swizzle;
1588
1589     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1590     ss2_bo = pp_context->surface_state_binding_table.bo;
1591     assert(ss2_bo);
1592
1593     dri_bo_map(ss2_bo, True);
1594     assert(ss2_bo->virtual);
1595     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1596     memset(ss2, 0, sizeof(*ss2));
1597     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1598     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1599     ss2->ss1.width = width - 1;
1600     ss2->ss1.height = height - 1;
1601     ss2->ss2.pitch = wpitch - 1;
1602     ss2->ss2.interleave_chroma = interleave_chroma;
1603     ss2->ss2.surface_format = format;
1604     ss2->ss3.x_offset_for_cb = xoffset;
1605     ss2->ss3.y_offset_for_cb = yoffset;
1606     pp_set_surface2_tiling(ss2, tiling);
1607     dri_bo_emit_reloc(ss2_bo,
1608                       I915_GEM_DOMAIN_RENDER, 0,
1609                       surf_bo_offset,
1610                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1611                       surf_bo);
1612     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1613     dri_bo_unmap(ss2_bo);
1614 }
1615
1616 static void
1617 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1618                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1619                           int width, int height, int pitch, int format, 
1620                           int index, int is_target)
1621 {
1622     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1623     struct gen7_surface_state *ss;
1624     dri_bo *ss_bo;
1625     unsigned int tiling;
1626     unsigned int swizzle;
1627
1628     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1629     ss_bo = pp_context->surface_state_binding_table.bo;
1630     assert(ss_bo);
1631
1632     dri_bo_map(ss_bo, True);
1633     assert(ss_bo->virtual);
1634     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1635     memset(ss, 0, sizeof(*ss));
1636     ss->ss0.surface_type = I965_SURFACE_2D;
1637     ss->ss0.surface_format = format;
1638     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1639     ss->ss2.width = width - 1;
1640     ss->ss2.height = height - 1;
1641     ss->ss3.pitch = pitch - 1;
1642     gen7_pp_set_surface_tiling(ss, tiling);
1643     if (IS_HASWELL(i965->intel.device_id))
1644         gen7_render_set_surface_scs(ss);
1645     dri_bo_emit_reloc(ss_bo,
1646                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1647                       surf_bo_offset,
1648                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1649                       surf_bo);
1650     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1651     dri_bo_unmap(ss_bo);
1652 }
1653
1654 static void
1655 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1656                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1657                            int width, int height, int wpitch,
1658                            int xoffset, int yoffset,
1659                            int format, int interleave_chroma,
1660                            int index)
1661 {
1662     struct gen7_surface_state2 *ss2;
1663     dri_bo *ss2_bo;
1664     unsigned int tiling;
1665     unsigned int swizzle;
1666
1667     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1668     ss2_bo = pp_context->surface_state_binding_table.bo;
1669     assert(ss2_bo);
1670
1671     dri_bo_map(ss2_bo, True);
1672     assert(ss2_bo->virtual);
1673     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1674     memset(ss2, 0, sizeof(*ss2));
1675     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1676     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1677     ss2->ss1.width = width - 1;
1678     ss2->ss1.height = height - 1;
1679     ss2->ss2.pitch = wpitch - 1;
1680     ss2->ss2.interleave_chroma = interleave_chroma;
1681     ss2->ss2.surface_format = format;
1682     ss2->ss3.x_offset_for_cb = xoffset;
1683     ss2->ss3.y_offset_for_cb = yoffset;
1684     gen7_pp_set_surface2_tiling(ss2, tiling);
1685     dri_bo_emit_reloc(ss2_bo,
1686                       I915_GEM_DOMAIN_RENDER, 0,
1687                       surf_bo_offset,
1688                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1689                       surf_bo);
1690     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1691     dri_bo_unmap(ss2_bo);
1692 }
1693
1694 static void 
1695 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1696                                 const struct i965_surface *surface, 
1697                                 int base_index, int is_target,
1698                                 int *width, int *height, int *pitch, int *offset)
1699 {
1700     struct object_surface *obj_surface;
1701     struct object_image *obj_image;
1702     dri_bo *bo;
1703     int fourcc = pp_get_surface_fourcc(ctx, surface);
1704     const int Y = 0;
1705     const int U = ((fourcc == VA_FOURCC_YV12) ||
1706                    (fourcc == VA_FOURCC_YV16))
1707                    ? 2 : 1;
1708     const int V = ((fourcc == VA_FOURCC_YV12) ||
1709                    (fourcc == VA_FOURCC_YV16))
1710                    ? 1 : 2;
1711     const int UV = 1;
1712     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1713     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1714     int full_packed_format = (fourcc == VA_FOURCC_RGBA ||
1715                               fourcc == VA_FOURCC_RGBX ||
1716                               fourcc == VA_FOURCC_BGRA ||
1717                               fourcc == VA_FOURCC_BGRX);
1718     int scale_factor_of_1st_plane_width_in_byte = 1;
1719                               
1720     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1721         obj_surface = (struct object_surface *)surface->base;
1722         bo = obj_surface->bo;
1723         width[0] = obj_surface->orig_width;
1724         height[0] = obj_surface->orig_height;
1725         pitch[0] = obj_surface->width;
1726         offset[0] = 0;
1727
1728         if (full_packed_format) {
1729             scale_factor_of_1st_plane_width_in_byte = 4; 
1730         }
1731         else if (packed_yuv ) {
1732             scale_factor_of_1st_plane_width_in_byte =  2; 
1733         }
1734         else if (interleaved_uv) {
1735             width[1] = obj_surface->orig_width;
1736             height[1] = obj_surface->orig_height / 2;
1737             pitch[1] = obj_surface->width;
1738             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1739         } else {
1740             width[1] = obj_surface->orig_width / 2;
1741             height[1] = obj_surface->orig_height / 2;
1742             pitch[1] = obj_surface->width / 2;
1743             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1744             width[2] = obj_surface->orig_width / 2;
1745             height[2] = obj_surface->orig_height / 2;
1746             pitch[2] = obj_surface->width / 2;
1747             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1748         }
1749     } else {
1750         obj_image = (struct object_image *)surface->base;
1751         bo = obj_image->bo;
1752         width[0] = obj_image->image.width;
1753         height[0] = obj_image->image.height;
1754         pitch[0] = obj_image->image.pitches[0];
1755         offset[0] = obj_image->image.offsets[0];
1756
1757         if (full_packed_format) {
1758             scale_factor_of_1st_plane_width_in_byte = 4;
1759         }
1760         else if (packed_yuv ) {
1761             scale_factor_of_1st_plane_width_in_byte = 2;
1762         }
1763         else if (interleaved_uv) {
1764             width[1] = obj_image->image.width;
1765             height[1] = obj_image->image.height / 2;
1766             pitch[1] = obj_image->image.pitches[1];
1767             offset[1] = obj_image->image.offsets[1];
1768         } else {
1769             width[1] = obj_image->image.width / 2;
1770             height[1] = obj_image->image.height / 2;
1771             pitch[1] = obj_image->image.pitches[1];
1772             offset[1] = obj_image->image.offsets[1];
1773             width[2] = obj_image->image.width / 2;
1774             height[2] = obj_image->image.height / 2;
1775             pitch[2] = obj_image->image.pitches[2];
1776             offset[2] = obj_image->image.offsets[2];
1777             if (fourcc == VA_FOURCC_YV16) {
1778                 width[1] = obj_image->image.width / 2;
1779                 height[1] = obj_image->image.height;
1780                 width[2] = obj_image->image.width / 2;
1781                 height[2] = obj_image->image.height;
1782             }
1783         }
1784     }
1785
1786     /* Y surface */
1787     i965_pp_set_surface_state(ctx, pp_context,
1788                               bo, offset[Y],
1789                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1790                               base_index, is_target);
1791
1792     if (!packed_yuv && !full_packed_format) {
1793         if (interleaved_uv) {
1794             i965_pp_set_surface_state(ctx, pp_context,
1795                                       bo, offset[UV],
1796                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1797                                       base_index + 1, is_target);
1798         } else {
1799             /* U surface */
1800             i965_pp_set_surface_state(ctx, pp_context,
1801                                       bo, offset[U],
1802                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1803                                       base_index + 1, is_target);
1804
1805             /* V surface */
1806             i965_pp_set_surface_state(ctx, pp_context,
1807                                       bo, offset[V],
1808                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1809                                       base_index + 2, is_target);
1810         }
1811     }
1812
1813 }
1814
1815 static void 
1816 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1817                                      const struct i965_surface *surface, 
1818                                      int base_index, int is_target,
1819                                      int *width, int *height, int *pitch, int *offset)
1820 {
1821     struct object_surface *obj_surface;
1822     struct object_image *obj_image;
1823     dri_bo *bo;
1824     int fourcc = pp_get_surface_fourcc(ctx, surface);
1825     const int U = (fourcc == VA_FOURCC_YV12 ||
1826                    fourcc == VA_FOURCC_YV16 ||
1827                    fourcc == VA_FOURCC_IMC1) ? 2 : 1;
1828     const int V = (fourcc == VA_FOURCC_YV12 ||
1829                    fourcc == VA_FOURCC_YV16 ||
1830                    fourcc == VA_FOURCC_IMC1) ? 1 : 2;
1831     int interleaved_uv = fourcc == VA_FOURCC_NV12;
1832     int packed_yuv = (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY);
1833     int rgbx_format = (fourcc == VA_FOURCC_RGBA ||
1834                               fourcc == VA_FOURCC_RGBX ||
1835                               fourcc == VA_FOURCC_BGRA ||
1836                               fourcc == VA_FOURCC_BGRX);
1837
1838     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1839         obj_surface = (struct object_surface *)surface->base;
1840         bo = obj_surface->bo;
1841         width[0] = obj_surface->orig_width;
1842         height[0] = obj_surface->orig_height;
1843         pitch[0] = obj_surface->width;
1844         offset[0] = 0;
1845
1846         if (packed_yuv) {
1847             if (is_target)
1848                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1849             else
1850                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1851         } else if (rgbx_format) {
1852             if (is_target)
1853                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
1854         }
1855
1856         width[1] = obj_surface->cb_cr_width;
1857         height[1] = obj_surface->cb_cr_height;
1858         pitch[1] = obj_surface->cb_cr_pitch;
1859         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1860
1861         width[2] = obj_surface->cb_cr_width;
1862         height[2] = obj_surface->cb_cr_height;
1863         pitch[2] = obj_surface->cb_cr_pitch;
1864         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1865     } else {
1866         obj_image = (struct object_image *)surface->base;
1867         bo = obj_image->bo;
1868         width[0] = obj_image->image.width;
1869         height[0] = obj_image->image.height;
1870         pitch[0] = obj_image->image.pitches[0];
1871         offset[0] = obj_image->image.offsets[0];
1872
1873         if (rgbx_format) {
1874             if (is_target)
1875                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
1876         } else if (packed_yuv) {
1877             if (is_target)
1878                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1879             else
1880                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1881         } else if (interleaved_uv) {
1882             width[1] = obj_image->image.width / 2;
1883             height[1] = obj_image->image.height / 2;
1884             pitch[1] = obj_image->image.pitches[1];
1885             offset[1] = obj_image->image.offsets[1];
1886         } else {
1887             width[1] = obj_image->image.width / 2;
1888             height[1] = obj_image->image.height / 2;
1889             pitch[1] = obj_image->image.pitches[U];
1890             offset[1] = obj_image->image.offsets[U];
1891             width[2] = obj_image->image.width / 2;
1892             height[2] = obj_image->image.height / 2;
1893             pitch[2] = obj_image->image.pitches[V];
1894             offset[2] = obj_image->image.offsets[V];
1895             if (fourcc == VA_FOURCC_YV16 || fourcc == VA_FOURCC_422H) {
1896                 width[1] = obj_image->image.width / 2;
1897                 height[1] = obj_image->image.height;
1898                 width[2] = obj_image->image.width / 2;
1899                 height[2] = obj_image->image.height;
1900             }
1901         }
1902     }
1903
1904     if (is_target) {
1905         gen7_pp_set_surface_state(ctx, pp_context,
1906                                   bo, 0,
1907                                   width[0] / 4, height[0], pitch[0],
1908                                   I965_SURFACEFORMAT_R8_UINT,
1909                                   base_index, 1);
1910         if (rgbx_format) {
1911                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1912                 /* the format is MSB: X-B-G-R */
1913                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1914                 if ((fourcc == VA_FOURCC_BGRA) ||
1915                         (fourcc == VA_FOURCC_BGRX)) {
1916                         /* It is stored as MSB: X-R-G-B */
1917                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1918                 }
1919         }
1920         if (!packed_yuv && !rgbx_format) {
1921             if (interleaved_uv) {
1922                 gen7_pp_set_surface_state(ctx, pp_context,
1923                                           bo, offset[1],
1924                                           width[1] / 2, height[1], pitch[1],
1925                                           I965_SURFACEFORMAT_R8G8_SINT,
1926                                           base_index + 1, 1);
1927             } else {
1928                 gen7_pp_set_surface_state(ctx, pp_context,
1929                                           bo, offset[1],
1930                                           width[1] / 4, height[1], pitch[1],
1931                                           I965_SURFACEFORMAT_R8_SINT,
1932                                           base_index + 1, 1);
1933                 gen7_pp_set_surface_state(ctx, pp_context,
1934                                           bo, offset[2],
1935                                           width[2] / 4, height[2], pitch[2],
1936                                           I965_SURFACEFORMAT_R8_SINT,
1937                                           base_index + 2, 1);
1938             }
1939         }
1940     } else {
1941         int format0 = SURFACE_FORMAT_Y8_UNORM;
1942
1943         switch (fourcc) {
1944         case VA_FOURCC_YUY2:
1945             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1946             break;
1947
1948         case VA_FOURCC_UYVY:
1949             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1950             break;
1951
1952         default:
1953             break;
1954         }
1955         if (rgbx_format) {
1956             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1957             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
1958             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
1959             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
1960             if ((fourcc == VA_FOURCC_BGRA) ||
1961                 (fourcc == VA_FOURCC_BGRX)) {
1962                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
1963             }
1964         }
1965         gen7_pp_set_surface2_state(ctx, pp_context,
1966                                    bo, offset[0],
1967                                    width[0], height[0], pitch[0],
1968                                    0, 0,
1969                                    format0, 0,
1970                                    base_index);
1971
1972         if (!packed_yuv && !rgbx_format) {
1973             if (interleaved_uv) {
1974                 gen7_pp_set_surface2_state(ctx, pp_context,
1975                                            bo, offset[1],
1976                                            width[1], height[1], pitch[1],
1977                                            0, 0,
1978                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1979                                            base_index + 1);
1980             } else {
1981                 gen7_pp_set_surface2_state(ctx, pp_context,
1982                                            bo, offset[1],
1983                                            width[1], height[1], pitch[1],
1984                                            0, 0,
1985                                            SURFACE_FORMAT_R8_UNORM, 0,
1986                                            base_index + 1);
1987                 gen7_pp_set_surface2_state(ctx, pp_context,
1988                                            bo, offset[2],
1989                                            width[2], height[2], pitch[2],
1990                                            0, 0,
1991                                            SURFACE_FORMAT_R8_UNORM, 0,
1992                                            base_index + 2);
1993             }
1994         }
1995     }
1996 }
1997
1998 static int
1999 pp_null_x_steps(void *private_context)
2000 {
2001     return 1;
2002 }
2003
2004 static int
2005 pp_null_y_steps(void *private_context)
2006 {
2007     return 1;
2008 }
2009
2010 static int
2011 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2012 {
2013     return 0;
2014 }
2015
2016 static VAStatus
2017 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2018                    const struct i965_surface *src_surface,
2019                    const VARectangle *src_rect,
2020                    struct i965_surface *dst_surface,
2021                    const VARectangle *dst_rect,
2022                    void *filter_param)
2023 {
2024     /* private function & data */
2025     pp_context->pp_x_steps = pp_null_x_steps;
2026     pp_context->pp_y_steps = pp_null_y_steps;
2027     pp_context->private_context = NULL;
2028     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
2029
2030     dst_surface->flags = src_surface->flags;
2031
2032     return VA_STATUS_SUCCESS;
2033 }
2034
2035 static int
2036 pp_load_save_x_steps(void *private_context)
2037 {
2038     return 1;
2039 }
2040
2041 static int
2042 pp_load_save_y_steps(void *private_context)
2043 {
2044     struct pp_load_save_context *pp_load_save_context = private_context;
2045
2046     return pp_load_save_context->dest_h / 8;
2047 }
2048
2049 static int
2050 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2051 {
2052     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2053     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)pp_context->private_context;
2054
2055     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
2056     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
2057
2058     return 0;
2059 }
2060
2061 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
2062 {
2063     int i;
2064     /* x offset of dest surface must be dword aligned.
2065      * so we have to extend dst surface on left edge, and mask out pixels not interested
2066      */
2067     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
2068         pp_context->block_horizontal_mask_left = 0;
2069         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
2070         {
2071             pp_context->block_horizontal_mask_left |= 1<<i;
2072         }
2073     }
2074     else {
2075         pp_context->block_horizontal_mask_left = 0xffff;
2076     }
2077     
2078     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
2079     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
2080         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
2081     }
2082     else {
2083         pp_context->block_horizontal_mask_right = 0xffff;
2084     }
2085     
2086     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
2087         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
2088     }
2089     else {
2090         pp_context->block_vertical_mask_bottom = 0xff;
2091     }
2092
2093 }
2094 static VAStatus
2095 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2096                                 const struct i965_surface *src_surface,
2097                                 const VARectangle *src_rect,
2098                                 struct i965_surface *dst_surface,
2099                                 const VARectangle *dst_rect,
2100                                 void *filter_param)
2101 {
2102     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->pp_load_save_context;
2103     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2104     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2105     int width[3], height[3], pitch[3], offset[3];
2106
2107     /* source surface */
2108     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2109                                     width, height, pitch, offset);
2110
2111     /* destination surface */
2112     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2113                                     width, height, pitch, offset);
2114
2115     /* private function & data */
2116     pp_context->pp_x_steps = pp_load_save_x_steps;
2117     pp_context->pp_y_steps = pp_load_save_y_steps;
2118     pp_context->private_context = &pp_context->pp_load_save_context;
2119     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2120
2121     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2122     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2123     pp_load_save_context->dest_y = dst_rect->y;
2124     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2125     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2126
2127     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2128     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2129
2130     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2131     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2132
2133     // update u/v offset for packed yuv
2134     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2135     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2136
2137     dst_surface->flags = src_surface->flags;
2138
2139     return VA_STATUS_SUCCESS;
2140 }
2141
2142 static int
2143 pp_scaling_x_steps(void *private_context)
2144 {
2145     return 1;
2146 }
2147
2148 static int
2149 pp_scaling_y_steps(void *private_context)
2150 {
2151     struct pp_scaling_context *pp_scaling_context = private_context;
2152
2153     return pp_scaling_context->dest_h / 8;
2154 }
2155
2156 static int
2157 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2158 {
2159     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)pp_context->private_context;
2160     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2161     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2162     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2163     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2164
2165     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2166     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2167     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2168     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2169     
2170     return 0;
2171 }
2172
2173 static VAStatus
2174 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2175                            const struct i965_surface *src_surface,
2176                            const VARectangle *src_rect,
2177                            struct i965_surface *dst_surface,
2178                            const VARectangle *dst_rect,
2179                            void *filter_param)
2180 {
2181     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->pp_scaling_context;
2182     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2183     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2184     struct object_surface *obj_surface;
2185     struct i965_sampler_state *sampler_state;
2186     int in_w, in_h, in_wpitch, in_hpitch;
2187     int out_w, out_h, out_wpitch, out_hpitch;
2188
2189     /* source surface */
2190     obj_surface = (struct object_surface *)src_surface->base;
2191     in_w = obj_surface->orig_width;
2192     in_h = obj_surface->orig_height;
2193     in_wpitch = obj_surface->width;
2194     in_hpitch = obj_surface->height;
2195
2196     /* source Y surface index 1 */
2197     i965_pp_set_surface_state(ctx, pp_context,
2198                               obj_surface->bo, 0,
2199                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2200                               1, 0);
2201
2202     /* source UV surface index 2 */
2203     i965_pp_set_surface_state(ctx, pp_context,
2204                               obj_surface->bo, in_wpitch * in_hpitch,
2205                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2206                               2, 0);
2207
2208     /* destination surface */
2209     obj_surface = (struct object_surface *)dst_surface->base;
2210     out_w = obj_surface->orig_width;
2211     out_h = obj_surface->orig_height;
2212     out_wpitch = obj_surface->width;
2213     out_hpitch = obj_surface->height;
2214
2215     /* destination Y surface index 7 */
2216     i965_pp_set_surface_state(ctx, pp_context,
2217                               obj_surface->bo, 0,
2218                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2219                               7, 1);
2220
2221     /* destination UV surface index 8 */
2222     i965_pp_set_surface_state(ctx, pp_context,
2223                               obj_surface->bo, out_wpitch * out_hpitch,
2224                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2225                               8, 1);
2226
2227     /* sampler state */
2228     dri_bo_map(pp_context->sampler_state_table.bo, True);
2229     assert(pp_context->sampler_state_table.bo->virtual);
2230     sampler_state = pp_context->sampler_state_table.bo->virtual;
2231
2232     /* SIMD16 Y index 1 */
2233     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2234     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2235     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2236     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2237     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2238
2239     /* SIMD16 UV index 2 */
2240     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2241     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2242     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2243     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2244     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2245
2246     dri_bo_unmap(pp_context->sampler_state_table.bo);
2247
2248     /* private function & data */
2249     pp_context->pp_x_steps = pp_scaling_x_steps;
2250     pp_context->pp_y_steps = pp_scaling_y_steps;
2251     pp_context->private_context = &pp_context->pp_scaling_context;
2252     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2253
2254     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2255     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2256     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2257     pp_scaling_context->dest_y = dst_rect->y;
2258     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2259     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2260     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2261     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2262
2263     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2264
2265     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2266     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2267     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2268
2269     dst_surface->flags = src_surface->flags;
2270
2271     return VA_STATUS_SUCCESS;
2272 }
2273
2274 static int
2275 pp_avs_x_steps(void *private_context)
2276 {
2277     struct pp_avs_context *pp_avs_context = private_context;
2278
2279     return pp_avs_context->dest_w / 16;
2280 }
2281
2282 static int
2283 pp_avs_y_steps(void *private_context)
2284 {
2285     return 1;
2286 }
2287
2288 static int
2289 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2290 {
2291     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2292     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2293     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2294     float src_x_steping, src_y_steping, video_step_delta;
2295     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2296
2297     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2298         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2299         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2300     } else if (tmp_w >= pp_avs_context->dest_w) {
2301         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2302         pp_inline_parameter->grf6.video_step_delta = 0;
2303         
2304         if (x == 0) {
2305             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2306                 pp_avs_context->src_normalized_x;
2307         } else {
2308             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2309             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2310             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2311                 16 * 15 * video_step_delta / 2;
2312         }
2313     } else {
2314         int n0, n1, n2, nls_left, nls_right;
2315         int factor_a = 5, factor_b = 4;
2316         float f;
2317
2318         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2319         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2320         n2 = tmp_w / (16 * factor_a);
2321         nls_left = n0 + n2;
2322         nls_right = n1 + n2;
2323         f = (float) n2 * 16 / tmp_w;
2324         
2325         if (n0 < 5) {
2326             pp_inline_parameter->grf6.video_step_delta = 0.0;
2327
2328             if (x == 0) {
2329                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2330                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2331             } else {
2332                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2333                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2334                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2335                     16 * 15 * video_step_delta / 2;
2336             }
2337         } else {
2338             if (x < nls_left) {
2339                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2340                 float a = f / (nls_left * 16 * factor_b);
2341                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2342                 
2343                 pp_inline_parameter->grf6.video_step_delta = b;
2344
2345                 if (x == 0) {
2346                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2347                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2348                 } else {
2349                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2350                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2351                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2352                         16 * 15 * video_step_delta / 2;
2353                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2354                 }
2355             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2356                 /* scale the center linearly */
2357                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2358                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2359                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2360                     16 * 15 * video_step_delta / 2;
2361                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2362                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2363             } else {
2364                 float a = f / (nls_right * 16 * factor_b);
2365                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2366
2367                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2368                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2369                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2370                     16 * 15 * video_step_delta / 2;
2371                 pp_inline_parameter->grf6.video_step_delta = -b;
2372
2373                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2374                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2375                 else
2376                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2377             }
2378         }
2379     }
2380
2381     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2382     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2383     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2384     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2385
2386     return 0;
2387 }
2388
2389 static VAStatus
2390 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2391                        const struct i965_surface *src_surface,
2392                        const VARectangle *src_rect,
2393                        struct i965_surface *dst_surface,
2394                        const VARectangle *dst_rect,
2395                        void *filter_param,
2396                        int nlas)
2397 {
2398     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2399     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2400     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2401     struct object_surface *obj_surface;
2402     struct i965_sampler_8x8 *sampler_8x8;
2403     struct i965_sampler_8x8_state *sampler_8x8_state;
2404     int index;
2405     int in_w, in_h, in_wpitch, in_hpitch;
2406     int out_w, out_h, out_wpitch, out_hpitch;
2407     int i;
2408
2409     /* surface */
2410     obj_surface = (struct object_surface *)src_surface->base;
2411     in_w = obj_surface->orig_width;
2412     in_h = obj_surface->orig_height;
2413     in_wpitch = obj_surface->width;
2414     in_hpitch = obj_surface->height;
2415
2416     /* source Y surface index 1 */
2417     i965_pp_set_surface2_state(ctx, pp_context,
2418                                obj_surface->bo, 0,
2419                                in_w, in_h, in_wpitch,
2420                                0, 0,
2421                                SURFACE_FORMAT_Y8_UNORM, 0,
2422                                1);
2423
2424     /* source UV surface index 2 */
2425     i965_pp_set_surface2_state(ctx, pp_context,
2426                                obj_surface->bo, in_wpitch * in_hpitch,
2427                                in_w / 2, in_h / 2, in_wpitch,
2428                                0, 0,
2429                                SURFACE_FORMAT_R8B8_UNORM, 0,
2430                                2);
2431
2432     /* destination surface */
2433     obj_surface = (struct object_surface *)dst_surface->base;
2434     out_w = obj_surface->orig_width;
2435     out_h = obj_surface->orig_height;
2436     out_wpitch = obj_surface->width;
2437     out_hpitch = obj_surface->height;
2438     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2439
2440     /* destination Y surface index 7 */
2441     i965_pp_set_surface_state(ctx, pp_context,
2442                               obj_surface->bo, 0,
2443                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2444                               7, 1);
2445
2446     /* destination UV surface index 8 */
2447     i965_pp_set_surface_state(ctx, pp_context,
2448                               obj_surface->bo, out_wpitch * out_hpitch,
2449                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2450                               8, 1);
2451
2452     /* sampler 8x8 state */
2453     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2454     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2455     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2456     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2457     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2458
2459     for (i = 0; i < 17; i++) {
2460         /* for Y channel, currently ignore */
2461         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2462         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2463         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2464         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2465         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2466         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2467         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2468         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2469         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2470         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2471         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2472         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2473         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2474         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2475         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2476         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2477         /* for U/V channel, 0.25 */
2478         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2479         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2480         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2481         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2482         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2483         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2484         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2485         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2486         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2487         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2488         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2489         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2490         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2491         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2492         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2493         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2494     }
2495
2496     sampler_8x8_state->dw136.default_sharpness_level = 0;
2497     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2498     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2499     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2500     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2501
2502     /* sampler 8x8 */
2503     dri_bo_map(pp_context->sampler_state_table.bo, True);
2504     assert(pp_context->sampler_state_table.bo->virtual);
2505     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2506     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2507
2508     /* sample_8x8 Y index 1 */
2509     index = 1;
2510     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2511     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2512     sampler_8x8[index].dw0.ief_bypass = 1;
2513     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2514     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2515     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2516     sampler_8x8[index].dw2.global_noise_estimation = 22;
2517     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2518     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2519     sampler_8x8[index].dw3.strong_edge_weight = 7;
2520     sampler_8x8[index].dw3.regular_weight = 2;
2521     sampler_8x8[index].dw3.non_edge_weight = 0;
2522     sampler_8x8[index].dw3.gain_factor = 40;
2523     sampler_8x8[index].dw4.steepness_boost = 0;
2524     sampler_8x8[index].dw4.steepness_threshold = 0;
2525     sampler_8x8[index].dw4.mr_boost = 0;
2526     sampler_8x8[index].dw4.mr_threshold = 5;
2527     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2528     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2529     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2530     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2531     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2532     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2533     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2534     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2535     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2536     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2537     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2538     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2539     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2540     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2541     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2542     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2543     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2544     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2545     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2546     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2547     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2548     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2549     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2550     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2551     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2552     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2553     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2554     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2555     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2556     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2557     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2558     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2559     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2560     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2561     sampler_8x8[index].dw13.limiter_boost = 0;
2562     sampler_8x8[index].dw13.minimum_limiter = 10;
2563     sampler_8x8[index].dw13.maximum_limiter = 11;
2564     sampler_8x8[index].dw14.clip_limiter = 130;
2565     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2566                       I915_GEM_DOMAIN_RENDER, 
2567                       0,
2568                       0,
2569                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2570                       pp_context->sampler_state_table.bo_8x8);
2571
2572     /* sample_8x8 UV index 2 */
2573     index = 2;
2574     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2575     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2576     sampler_8x8[index].dw0.ief_bypass = 1;
2577     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2578     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2579     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2580     sampler_8x8[index].dw2.global_noise_estimation = 22;
2581     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2582     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2583     sampler_8x8[index].dw3.strong_edge_weight = 7;
2584     sampler_8x8[index].dw3.regular_weight = 2;
2585     sampler_8x8[index].dw3.non_edge_weight = 0;
2586     sampler_8x8[index].dw3.gain_factor = 40;
2587     sampler_8x8[index].dw4.steepness_boost = 0;
2588     sampler_8x8[index].dw4.steepness_threshold = 0;
2589     sampler_8x8[index].dw4.mr_boost = 0;
2590     sampler_8x8[index].dw4.mr_threshold = 5;
2591     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2592     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2593     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2594     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2595     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2596     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2597     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2598     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2599     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2600     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2601     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2602     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2603     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2604     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2605     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2606     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2607     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2608     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2609     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2610     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2611     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2612     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2613     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2614     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2615     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2616     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2617     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2618     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2619     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2620     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2621     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2622     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2623     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2624     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2625     sampler_8x8[index].dw13.limiter_boost = 0;
2626     sampler_8x8[index].dw13.minimum_limiter = 10;
2627     sampler_8x8[index].dw13.maximum_limiter = 11;
2628     sampler_8x8[index].dw14.clip_limiter = 130;
2629     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2630                       I915_GEM_DOMAIN_RENDER, 
2631                       0,
2632                       0,
2633                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2634                       pp_context->sampler_state_table.bo_8x8);
2635
2636     dri_bo_unmap(pp_context->sampler_state_table.bo);
2637
2638     /* private function & data */
2639     pp_context->pp_x_steps = pp_avs_x_steps;
2640     pp_context->pp_y_steps = pp_avs_y_steps;
2641     pp_context->private_context = &pp_context->pp_avs_context;
2642     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2643
2644     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2645     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2646     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2647     pp_avs_context->dest_y = dst_rect->y;
2648     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2649     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2650     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2651     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2652     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2653     pp_avs_context->src_h = src_rect->height;
2654
2655     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2656     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2657
2658     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2659     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2660     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2661     pp_inline_parameter->grf6.video_step_delta = 0.0;
2662
2663     dst_surface->flags = src_surface->flags;
2664
2665     return VA_STATUS_SUCCESS;
2666 }
2667
2668 static VAStatus
2669 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2670                             const struct i965_surface *src_surface,
2671                             const VARectangle *src_rect,
2672                             struct i965_surface *dst_surface,
2673                             const VARectangle *dst_rect,
2674                             void *filter_param)
2675 {
2676     return pp_nv12_avs_initialize(ctx, pp_context,
2677                                   src_surface,
2678                                   src_rect,
2679                                   dst_surface,
2680                                   dst_rect,
2681                                   filter_param,
2682                                   1);
2683 }
2684
2685 static VAStatus
2686 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2687                              const struct i965_surface *src_surface,
2688                              const VARectangle *src_rect,
2689                              struct i965_surface *dst_surface,
2690                              const VARectangle *dst_rect,
2691                              void *filter_param)
2692 {
2693     return pp_nv12_avs_initialize(ctx, pp_context,
2694                                   src_surface,
2695                                   src_rect,
2696                                   dst_surface,
2697                                   dst_rect,
2698                                   filter_param,
2699                                   0);    
2700 }
2701
2702 static int
2703 gen7_pp_avs_x_steps(void *private_context)
2704 {
2705     struct pp_avs_context *pp_avs_context = private_context;
2706
2707     return pp_avs_context->dest_w / 16;
2708 }
2709
2710 static int
2711 gen7_pp_avs_y_steps(void *private_context)
2712 {
2713     struct pp_avs_context *pp_avs_context = private_context;
2714
2715     return pp_avs_context->dest_h / 16;
2716 }
2717
2718 static int
2719 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2720 {
2721     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
2722     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2723
2724     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2725     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2726     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2727     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
2728
2729     return 0;
2730 }
2731
2732 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2733                                               struct i965_post_processing_context *pp_context,
2734                                               const struct i965_surface *surface)
2735 {
2736     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2737     int fourcc = pp_get_surface_fourcc(ctx, surface);
2738     
2739     if (fourcc == VA_FOURCC_YUY2) {
2740         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2741         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2742         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2743     } else if (fourcc == VA_FOURCC_UYVY) {
2744         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2745         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2746         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2747     }
2748 }
2749
2750 static VAStatus
2751 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2752                            const struct i965_surface *src_surface,
2753                            const VARectangle *src_rect,
2754                            struct i965_surface *dst_surface,
2755                            const VARectangle *dst_rect,
2756                            void *filter_param)
2757 {
2758     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->pp_avs_context;
2759     struct i965_driver_data *i965 = i965_driver_data(ctx);
2760     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2761     struct gen7_sampler_8x8 *sampler_8x8;
2762     struct i965_sampler_8x8_state *sampler_8x8_state;
2763     int index, i;
2764     int width[3], height[3], pitch[3], offset[3];
2765     int src_width, src_height;
2766
2767     /* source surface */
2768     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2769                                          width, height, pitch, offset);
2770     src_width = width[0];
2771     src_height = height[0];
2772
2773     /* destination surface */
2774     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2775                                          width, height, pitch, offset);
2776
2777     /* sampler 8x8 state */
2778     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2779     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2780     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2781     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2782     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2783
2784     for (i = 0; i < 17; i++) {
2785         float coff;
2786         coff = i;
2787         coff = coff / 16;
2788         /* for Y channel, currently ignore */
2789         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2790         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2791         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2792         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
2793         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2794         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2795         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2796         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2797         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2798         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2799         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2800         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2801         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2802         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2803         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2804         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2805         /* for U/V channel, 0.25 */
2806         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2807         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2808         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
2809         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2810         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2811         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
2812         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2813         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2814         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2815         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2816         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
2817         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2818         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2819         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
2820         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2821         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2822     }
2823
2824     sampler_8x8_state->dw136.default_sharpness_level = 0;
2825     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2826     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2827     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2828     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2829
2830     /* sampler 8x8 */
2831     dri_bo_map(pp_context->sampler_state_table.bo, True);
2832     assert(pp_context->sampler_state_table.bo->virtual);
2833     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2834     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2835
2836     /* sample_8x8 Y index 4 */
2837     index = 4;
2838     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2839     sampler_8x8[index].dw0.global_noise_estimation = 255;
2840     sampler_8x8[index].dw0.ief_bypass = 1;
2841
2842     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2843
2844     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2845     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2846     sampler_8x8[index].dw2.r5x_coefficient = 9;
2847     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2848     sampler_8x8[index].dw2.r5c_coefficient = 3;
2849
2850     sampler_8x8[index].dw3.r3x_coefficient = 27;
2851     sampler_8x8[index].dw3.r3c_coefficient = 5;
2852     sampler_8x8[index].dw3.gain_factor = 40;
2853     sampler_8x8[index].dw3.non_edge_weight = 1;
2854     sampler_8x8[index].dw3.regular_weight = 2;
2855     sampler_8x8[index].dw3.strong_edge_weight = 7;
2856     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2857
2858     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2859                       I915_GEM_DOMAIN_RENDER, 
2860                       0,
2861                       0,
2862                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2863                       pp_context->sampler_state_table.bo_8x8);
2864
2865     /* sample_8x8 UV index 8 */
2866     index = 8;
2867     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2868     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2869     sampler_8x8[index].dw0.global_noise_estimation = 255;
2870     sampler_8x8[index].dw0.ief_bypass = 1;
2871     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2872     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2873     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2874     sampler_8x8[index].dw2.r5x_coefficient = 9;
2875     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2876     sampler_8x8[index].dw2.r5c_coefficient = 3;
2877     sampler_8x8[index].dw3.r3x_coefficient = 27;
2878     sampler_8x8[index].dw3.r3c_coefficient = 5;
2879     sampler_8x8[index].dw3.gain_factor = 40;
2880     sampler_8x8[index].dw3.non_edge_weight = 1;
2881     sampler_8x8[index].dw3.regular_weight = 2;
2882     sampler_8x8[index].dw3.strong_edge_weight = 7;
2883     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2884
2885     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2886                       I915_GEM_DOMAIN_RENDER, 
2887                       0,
2888                       0,
2889                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2890                       pp_context->sampler_state_table.bo_8x8);
2891
2892     /* sampler_8x8 V, index 12 */
2893     index = 12;
2894     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2895     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2896     sampler_8x8[index].dw0.global_noise_estimation = 255;
2897     sampler_8x8[index].dw0.ief_bypass = 1;
2898     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2899     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2900     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2901     sampler_8x8[index].dw2.r5x_coefficient = 9;
2902     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2903     sampler_8x8[index].dw2.r5c_coefficient = 3;
2904     sampler_8x8[index].dw3.r3x_coefficient = 27;
2905     sampler_8x8[index].dw3.r3c_coefficient = 5;
2906     sampler_8x8[index].dw3.gain_factor = 40;
2907     sampler_8x8[index].dw3.non_edge_weight = 1;
2908     sampler_8x8[index].dw3.regular_weight = 2;
2909     sampler_8x8[index].dw3.strong_edge_weight = 7;
2910     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2911
2912     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2913                       I915_GEM_DOMAIN_RENDER, 
2914                       0,
2915                       0,
2916                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2917                       pp_context->sampler_state_table.bo_8x8);
2918
2919     dri_bo_unmap(pp_context->sampler_state_table.bo);
2920
2921     /* private function & data */
2922     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2923     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2924     pp_context->private_context = &pp_context->pp_avs_context;
2925     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2926
2927     pp_avs_context->dest_x = dst_rect->x;
2928     pp_avs_context->dest_y = dst_rect->y;
2929     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2930     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2931     pp_avs_context->src_w = src_rect->width;
2932     pp_avs_context->src_h = src_rect->height;
2933     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
2934
2935     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2936     dw = MAX(dw, dst_rect->width);
2937
2938     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2939     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2940     if (IS_HASWELL(i965->intel.device_id))
2941         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
2942
2943     if (pp_static_parameter->grf2.avs_wa_enable) {
2944         int src_fourcc = pp_get_surface_fourcc(ctx, src_surface);
2945         if ((src_fourcc == VA_FOURCC_RGBA) ||
2946             (src_fourcc == VA_FOURCC_RGBX) ||
2947             (src_fourcc == VA_FOURCC_BGRA) ||
2948             (src_fourcc == VA_FOURCC_BGRX)) {
2949             pp_static_parameter->grf2.avs_wa_enable = 0;
2950         }
2951     }
2952         
2953     pp_static_parameter->grf2.avs_wa_width = dw;
2954     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2955     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2956     pp_static_parameter->grf2.alpha = 255;
2957
2958     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2959     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / dst_rect->height;
2960     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
2961         (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
2962     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
2963         (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
2964
2965     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2966
2967     dst_surface->flags = src_surface->flags;
2968
2969     return VA_STATUS_SUCCESS;
2970 }
2971
2972 static int
2973 pp_dndi_x_steps(void *private_context)
2974 {
2975     return 1;
2976 }
2977
2978 static int
2979 pp_dndi_y_steps(void *private_context)
2980 {
2981     struct pp_dndi_context *pp_dndi_context = private_context;
2982
2983     return pp_dndi_context->dest_h / 4;
2984 }
2985
2986 static int
2987 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2988 {
2989     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2990
2991     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
2992     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
2993
2994     return 0;
2995 }
2996
2997 static VAStatus
2998 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2999                         const struct i965_surface *src_surface,
3000                         const VARectangle *src_rect,
3001                         struct i965_surface *dst_surface,
3002                         const VARectangle *dst_rect,
3003                         void *filter_param)
3004 {
3005     struct i965_driver_data *i965 = i965_driver_data(ctx);
3006     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3007     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3008     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3009     struct object_surface *obj_surface;
3010     struct i965_sampler_dndi *sampler_dndi;
3011     int index;
3012     int w, h;
3013     int orig_w, orig_h;
3014     int dndi_top_first = 1;
3015     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3016
3017     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3018         dndi_top_first = 0;
3019     else
3020         dndi_top_first = 1;
3021
3022     /* surface */
3023     obj_surface = (struct object_surface *)src_surface->base;
3024     orig_w = obj_surface->orig_width;
3025     orig_h = obj_surface->orig_height;
3026     w = obj_surface->width;
3027     h = obj_surface->height;
3028
3029     if (pp_dndi_context->stmm_bo == NULL) {
3030         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3031                                                 "STMM surface",
3032                                                 w * h,
3033                                                 4096);
3034         assert(pp_dndi_context->stmm_bo);
3035     }
3036
3037     /* source UV surface index 2 */
3038     i965_pp_set_surface_state(ctx, pp_context,
3039                               obj_surface->bo, w * h,
3040                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3041                               2, 0);
3042
3043     /* source YUV surface index 4 */
3044     i965_pp_set_surface2_state(ctx, pp_context,
3045                                obj_surface->bo, 0,
3046                                orig_w, orig_h, w,
3047                                0, h,
3048                                SURFACE_FORMAT_PLANAR_420_8, 1,
3049                                4);
3050
3051     /* source STMM surface index 20 */
3052     i965_pp_set_surface_state(ctx, pp_context,
3053                               pp_dndi_context->stmm_bo, 0,
3054                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3055                               20, 1);
3056
3057     /* destination surface */
3058     obj_surface = (struct object_surface *)dst_surface->base;
3059     orig_w = obj_surface->orig_width;
3060     orig_h = obj_surface->orig_height;
3061     w = obj_surface->width;
3062     h = obj_surface->height;
3063
3064     /* destination Y surface index 7 */
3065     i965_pp_set_surface_state(ctx, pp_context,
3066                               obj_surface->bo, 0,
3067                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3068                               7, 1);
3069
3070     /* destination UV surface index 8 */
3071     i965_pp_set_surface_state(ctx, pp_context,
3072                               obj_surface->bo, w * h,
3073                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3074                               8, 1);
3075     /* sampler dndi */
3076     dri_bo_map(pp_context->sampler_state_table.bo, True);
3077     assert(pp_context->sampler_state_table.bo->virtual);
3078     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3079     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3080
3081     /* sample dndi index 1 */
3082     index = 0;
3083     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3084     sampler_dndi[index].dw0.denoise_history_delta = 7;          // 0-15, default is 8
3085     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3086     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3087
3088     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3089     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3090     sampler_dndi[index].dw1.stmm_c2 = 1;
3091     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3092     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3093
3094     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3095     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 1;    // 0-15
3096     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3097     sampler_dndi[index].dw2.good_neighbor_threshold = 12;                // 0-63
3098
3099     sampler_dndi[index].dw3.maximum_stmm = 150;
3100     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3101     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3102     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3103     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3104
3105     sampler_dndi[index].dw4.sdi_delta = 5;
3106     sampler_dndi[index].dw4.sdi_threshold = 100;
3107     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3108     sampler_dndi[index].dw4.stmm_shift_up = 1;
3109     sampler_dndi[index].dw4.stmm_shift_down = 0;
3110     sampler_dndi[index].dw4.minimum_stmm = 118;
3111
3112     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3113     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3114     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3115     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3116
3117     sampler_dndi[index].dw6.dn_enable = 1;
3118     sampler_dndi[index].dw6.di_enable = 1;
3119     sampler_dndi[index].dw6.di_partial = 0;
3120     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3121     sampler_dndi[index].dw6.dndi_stream_id = 0;
3122     sampler_dndi[index].dw6.dndi_first_frame = 1;
3123     sampler_dndi[index].dw6.progressive_dn = 0;
3124     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3125     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3126     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3127
3128     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3129     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3130     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3131     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3132
3133     dri_bo_unmap(pp_context->sampler_state_table.bo);
3134
3135     /* private function & data */
3136     pp_context->pp_x_steps = pp_dndi_x_steps;
3137     pp_context->pp_y_steps = pp_dndi_y_steps;
3138     pp_context->private_context = &pp_context->pp_dndi_context;
3139     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3140
3141     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3142     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3143     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3144     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3145
3146     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3147     pp_inline_parameter->grf5.number_blocks = w / 16;
3148     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3149     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3150
3151     pp_dndi_context->dest_w = w;
3152     pp_dndi_context->dest_h = h;
3153
3154     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3155
3156     return VA_STATUS_SUCCESS;
3157 }
3158
3159 static int
3160 pp_dn_x_steps(void *private_context)
3161 {
3162     return 1;
3163 }
3164
3165 static int
3166 pp_dn_y_steps(void *private_context)
3167 {
3168     struct pp_dn_context *pp_dn_context = private_context;
3169
3170     return pp_dn_context->dest_h / 8;
3171 }
3172
3173 static int
3174 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3175 {
3176     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3177
3178     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3179     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3180
3181     return 0;
3182 }
3183
3184 static VAStatus
3185 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3186                       const struct i965_surface *src_surface,
3187                       const VARectangle *src_rect,
3188                       struct i965_surface *dst_surface,
3189                       const VARectangle *dst_rect,
3190                       void *filter_param)
3191 {
3192     struct i965_driver_data *i965 = i965_driver_data(ctx);
3193     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3194     struct object_surface *obj_surface;
3195     struct i965_sampler_dndi *sampler_dndi;
3196     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3197     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3198     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3199     int index;
3200     int w, h;
3201     int orig_w, orig_h;
3202     int dn_strength = 15;
3203     int dndi_top_first = 1;
3204     int dn_progressive = 0;
3205
3206     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3207         dndi_top_first = 1;
3208         dn_progressive = 1;
3209     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3210         dndi_top_first = 1;
3211         dn_progressive = 0;
3212     } else {
3213         dndi_top_first = 0;
3214         dn_progressive = 0;
3215     }
3216
3217     if (dn_filter_param) {
3218         float value = dn_filter_param->value;
3219         
3220         if (value > 1.0)
3221             value = 1.0;
3222         
3223         if (value < 0.0)
3224             value = 0.0;
3225
3226         dn_strength = (int)(value * 31.0F);
3227     }
3228
3229     /* surface */
3230     obj_surface = (struct object_surface *)src_surface->base;
3231     orig_w = obj_surface->orig_width;
3232     orig_h = obj_surface->orig_height;
3233     w = obj_surface->width;
3234     h = obj_surface->height;
3235
3236     if (pp_dn_context->stmm_bo == NULL) {
3237         pp_dn_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3238                                               "STMM surface",
3239                                               w * h,
3240                                               4096);
3241         assert(pp_dn_context->stmm_bo);
3242     }
3243
3244     /* source UV surface index 2 */
3245     i965_pp_set_surface_state(ctx, pp_context,
3246                               obj_surface->bo, w * h,
3247                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3248                               2, 0);
3249
3250     /* source YUV surface index 4 */
3251     i965_pp_set_surface2_state(ctx, pp_context,
3252                                obj_surface->bo, 0,
3253                                orig_w, orig_h, w,
3254                                0, h,
3255                                SURFACE_FORMAT_PLANAR_420_8, 1,
3256                                4);
3257
3258     /* source STMM surface index 20 */
3259     i965_pp_set_surface_state(ctx, pp_context,
3260                               pp_dn_context->stmm_bo, 0,
3261                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3262                               20, 1);
3263
3264     /* destination surface */
3265     obj_surface = (struct object_surface *)dst_surface->base;
3266     orig_w = obj_surface->orig_width;
3267     orig_h = obj_surface->orig_height;
3268     w = obj_surface->width;
3269     h = obj_surface->height;
3270
3271     /* destination Y surface index 7 */
3272     i965_pp_set_surface_state(ctx, pp_context,
3273                               obj_surface->bo, 0,
3274                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3275                               7, 1);
3276
3277     /* destination UV surface index 8 */
3278     i965_pp_set_surface_state(ctx, pp_context,
3279                               obj_surface->bo, w * h,
3280                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3281                               8, 1);
3282     /* sampler dn */
3283     dri_bo_map(pp_context->sampler_state_table.bo, True);
3284     assert(pp_context->sampler_state_table.bo->virtual);
3285     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3286     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3287
3288     /* sample dndi index 1 */
3289     index = 0;
3290     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3291     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3292     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3293     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3294
3295     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3296     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3297     sampler_dndi[index].dw1.stmm_c2 = 0;
3298     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3299     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3300
3301     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3302     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3303     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3304     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3305
3306     sampler_dndi[index].dw3.maximum_stmm = 128;
3307     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3308     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3309     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3310     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3311
3312     sampler_dndi[index].dw4.sdi_delta = 8;
3313     sampler_dndi[index].dw4.sdi_threshold = 128;
3314     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3315     sampler_dndi[index].dw4.stmm_shift_up = 0;
3316     sampler_dndi[index].dw4.stmm_shift_down = 0;
3317     sampler_dndi[index].dw4.minimum_stmm = 0;
3318
3319     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3320     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3321     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3322     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3323
3324     sampler_dndi[index].dw6.dn_enable = 1;
3325     sampler_dndi[index].dw6.di_enable = 0;
3326     sampler_dndi[index].dw6.di_partial = 0;
3327     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3328     sampler_dndi[index].dw6.dndi_stream_id = 1;
3329     sampler_dndi[index].dw6.dndi_first_frame = 1;
3330     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3331     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3332     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3333     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3334
3335     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3336     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3337     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3338     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3339
3340     dri_bo_unmap(pp_context->sampler_state_table.bo);
3341
3342     /* private function & data */
3343     pp_context->pp_x_steps = pp_dn_x_steps;
3344     pp_context->pp_y_steps = pp_dn_y_steps;
3345     pp_context->private_context = &pp_context->pp_dn_context;
3346     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3347
3348     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3349     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3350     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3351     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3352
3353     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3354     pp_inline_parameter->grf5.number_blocks = w / 16;
3355     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3356     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3357
3358     pp_dn_context->dest_w = w;
3359     pp_dn_context->dest_h = h;
3360
3361     dst_surface->flags = src_surface->flags;
3362     
3363     return VA_STATUS_SUCCESS;
3364 }
3365
3366 static int
3367 gen7_pp_dndi_x_steps(void *private_context)
3368 {
3369     struct pp_dndi_context *pp_dndi_context = private_context;
3370
3371     return pp_dndi_context->dest_w / 16;
3372 }
3373
3374 static int
3375 gen7_pp_dndi_y_steps(void *private_context)
3376 {
3377     struct pp_dndi_context *pp_dndi_context = private_context;
3378
3379     return pp_dndi_context->dest_h / 4;
3380 }
3381
3382 static int
3383 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3384 {
3385     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3386
3387     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3388     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3389
3390     return 0;
3391 }
3392
3393
3394 extern VAStatus
3395 vpp_surface_convert(VADriverContextP ctx,
3396                     struct object_surface *src_obj_surf,
3397                     struct object_surface *dst_obj_surf);
3398
3399 static VAStatus
3400 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3401                              const struct i965_surface *src_surface,
3402                              const VARectangle *src_rect,
3403                              struct i965_surface *dst_surface,
3404                              const VARectangle *dst_rect,
3405                              void *filter_param)
3406 {
3407     struct i965_driver_data *i965 = i965_driver_data(ctx);
3408     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->pp_dndi_context;
3409     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3410     struct object_surface *previous_in_obj_surface, *current_in_obj_surface, *previous_out_obj_surface, *current_out_obj_surface;
3411     struct gen7_sampler_dndi *sampler_dndi;
3412     int index;
3413     int w, h;
3414     int orig_w, orig_h;
3415     int dndi_top_first = 1;
3416     VAProcFilterParameterBufferDeinterlacing *di_filter_param = (VAProcFilterParameterBufferDeinterlacing *)filter_param;
3417     int is_first_frame = (pp_dndi_context->frame_order == -1);
3418
3419     if (di_filter_param->flags & VA_DEINTERLACING_BOTTOM_FIELD)
3420         dndi_top_first = 0;
3421     else
3422         dndi_top_first = 1;
3423
3424     /* surface */
3425     current_in_obj_surface = (struct object_surface *)src_surface->base;
3426
3427     if (di_filter_param->algorithm == VAProcDeinterlacingBob) {
3428         previous_in_obj_surface = current_in_obj_surface;
3429         is_first_frame = 1;
3430     } else if (di_filter_param->algorithm == VAProcDeinterlacingMotionAdaptive) {
3431         if (pp_dndi_context->frame_order == 0) {
3432             VAProcPipelineParameterBuffer *pipeline_param = pp_context->pipeline_param;
3433             if (!pipeline_param ||
3434                 !pipeline_param->num_forward_references ||
3435                 pipeline_param->forward_references[0] == VA_INVALID_ID) {
3436                 WARN_ONCE("A forward temporal reference is needed for Motion adaptive deinterlacing !!!\n");
3437
3438                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3439             } else {
3440                 previous_in_obj_surface = SURFACE(pipeline_param->forward_references[0]);
3441                 assert(previous_in_obj_surface && previous_in_obj_surface->bo);
3442
3443                 is_first_frame = 0;
3444             }
3445         } else if (pp_dndi_context->frame_order == 1) {
3446             vpp_surface_convert(ctx,
3447                                 pp_dndi_context->current_out_obj_surface,
3448                                 (struct object_surface *)dst_surface->base);
3449             pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3450             is_first_frame = 0;
3451
3452             return VA_STATUS_SUCCESS_1;
3453         } else {
3454             previous_in_obj_surface = current_in_obj_surface;
3455             is_first_frame = 1;
3456         }
3457     } else {
3458         return VA_STATUS_ERROR_UNIMPLEMENTED;
3459     }
3460
3461     /* source (temporal reference) YUV surface index 4 */
3462     orig_w = previous_in_obj_surface->orig_width;
3463     orig_h = previous_in_obj_surface->orig_height;
3464     w = previous_in_obj_surface->width;
3465     h = previous_in_obj_surface->height;
3466     gen7_pp_set_surface2_state(ctx, pp_context,
3467                                previous_in_obj_surface->bo, 0,
3468                                orig_w, orig_h, w,
3469                                0, h,
3470                                SURFACE_FORMAT_PLANAR_420_8, 1,
3471                                4);
3472
3473     /* source surface */
3474     orig_w = current_in_obj_surface->orig_width;
3475     orig_h = current_in_obj_surface->orig_height;
3476     w = current_in_obj_surface->width;
3477     h = current_in_obj_surface->height;
3478
3479     /* source UV surface index 1 */
3480     gen7_pp_set_surface_state(ctx, pp_context,
3481                               current_in_obj_surface->bo, w * h,
3482                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3483                               1, 0);
3484
3485     /* source YUV surface index 3 */
3486     gen7_pp_set_surface2_state(ctx, pp_context,
3487                                current_in_obj_surface->bo, 0,
3488                                orig_w, orig_h, w,
3489                                0, h,
3490                                SURFACE_FORMAT_PLANAR_420_8, 1,
3491                                3);
3492
3493     /* STMM / History Statistics input surface, index 5 */
3494     if (pp_dndi_context->stmm_bo == NULL) {
3495         pp_dndi_context->stmm_bo = dri_bo_alloc(i965->intel.bufmgr,
3496                                                 "STMM surface",
3497                                                 w * h,
3498                                                 4096);
3499         assert(pp_dndi_context->stmm_bo);
3500     }
3501
3502     gen7_pp_set_surface_state(ctx, pp_context,
3503                               pp_dndi_context->stmm_bo, 0,
3504                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3505                               5, 1);
3506
3507     /* destination surface */
3508     previous_out_obj_surface = (struct object_surface *)dst_surface->base;
3509     orig_w = previous_out_obj_surface->orig_width;
3510     orig_h = previous_out_obj_surface->orig_height;
3511     w = previous_out_obj_surface->width;
3512     h = previous_out_obj_surface->height;
3513
3514     if (is_first_frame) {
3515         current_out_obj_surface = previous_out_obj_surface;
3516     } else {
3517         VAStatus va_status;
3518
3519         if (pp_dndi_context->current_out_surface == VA_INVALID_SURFACE) {
3520             unsigned int tiling = 0, swizzle = 0;
3521             dri_bo_get_tiling(previous_out_obj_surface->bo, &tiling, &swizzle);
3522
3523             va_status = i965_CreateSurfaces(ctx,
3524                                             orig_w,
3525                                             orig_h,
3526                                             VA_RT_FORMAT_YUV420,
3527                                             1,
3528                                             &pp_dndi_context->current_out_surface);
3529             assert(va_status == VA_STATUS_SUCCESS);
3530             pp_dndi_context->current_out_obj_surface = SURFACE(pp_dndi_context->current_out_surface);
3531             assert(pp_dndi_context->current_out_obj_surface);
3532             i965_check_alloc_surface_bo(ctx,
3533                                         pp_dndi_context->current_out_obj_surface,
3534                                         tiling != I915_TILING_NONE,
3535                                         VA_FOURCC_NV12,
3536                                         SUBSAMPLE_YUV420);
3537         }
3538
3539         current_out_obj_surface = pp_dndi_context->current_out_obj_surface;
3540     }
3541
3542     /* destination(Previous frame) Y surface index 27 */
3543     gen7_pp_set_surface_state(ctx, pp_context,
3544                               previous_out_obj_surface->bo, 0,
3545                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3546                               27, 1);
3547
3548     /* destination(Previous frame) UV surface index 28 */
3549     gen7_pp_set_surface_state(ctx, pp_context,
3550                               previous_out_obj_surface->bo, w * h,
3551                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3552                               28, 1);
3553
3554     /* destination(Current frame) Y surface index 30 */
3555     gen7_pp_set_surface_state(ctx, pp_context,
3556                               current_out_obj_surface->bo, 0,
3557                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3558                               30, 1);
3559
3560     /* destination(Current frame) UV surface index 31 */
3561     orig_w = current_out_obj_surface->orig_width;
3562     orig_h = current_out_obj_surface->orig_height;
3563     w = current_out_obj_surface->width;
3564     h = current_out_obj_surface->height;
3565
3566     gen7_pp_set_surface_state(ctx, pp_context,
3567                               current_out_obj_surface->bo, w * h,
3568                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3569                               31, 1);
3570
3571     /* STMM output surface, index 33 */
3572     gen7_pp_set_surface_state(ctx, pp_context,
3573                               pp_dndi_context->stmm_bo, 0,
3574                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3575                               33, 1);
3576
3577
3578     /* sampler dndi */
3579     dri_bo_map(pp_context->sampler_state_table.bo, True);
3580     assert(pp_context->sampler_state_table.bo->virtual);
3581     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3582     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3583
3584     /* sample dndi index 0 */
3585     index = 0;
3586     sampler_dndi[index].dw0.denoise_asd_threshold = 38;
3587     sampler_dndi[index].dw0.dnmh_delt = 7;
3588     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3589     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3590     sampler_dndi[index].dw0.denoise_maximum_history = 192;      // 128-240
3591     sampler_dndi[index].dw0.denoise_stad_threshold = 140;
3592
3593     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 38;
3594     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 1;
3595     sampler_dndi[index].dw1.stmm_c2 = 2;
3596     sampler_dndi[index].dw1.low_temporal_difference_threshold = 0;
3597     sampler_dndi[index].dw1.temporal_difference_threshold = 0;
3598
3599     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 20;   // 0-31
3600     sampler_dndi[index].dw2.bne_edge_th = 1;
3601     sampler_dndi[index].dw2.smooth_mv_th = 0;
3602     sampler_dndi[index].dw2.sad_tight_th = 5;
3603     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3604     sampler_dndi[index].dw2.good_neighbor_th = 12;
3605
3606     sampler_dndi[index].dw3.maximum_stmm = 150;
3607     sampler_dndi[index].dw3.multipler_for_vecm = 30;
3608     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 125;
3609     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3610     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3611
3612     sampler_dndi[index].dw4.sdi_delta = 5;
3613     sampler_dndi[index].dw4.sdi_threshold = 100;
3614     sampler_dndi[index].dw4.stmm_output_shift = 5;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3615     sampler_dndi[index].dw4.stmm_shift_up = 1;
3616     sampler_dndi[index].dw4.stmm_shift_down = 0;
3617     sampler_dndi[index].dw4.minimum_stmm = 118;
3618
3619     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 175;
3620     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 37;
3621     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 100;
3622     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 50;
3623     sampler_dndi[index].dw6.dn_enable = 0;
3624     sampler_dndi[index].dw6.di_enable = 1;
3625     sampler_dndi[index].dw6.di_partial = 0;
3626     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3627     sampler_dndi[index].dw6.dndi_stream_id = 1;
3628     sampler_dndi[index].dw6.dndi_first_frame = is_first_frame;
3629     sampler_dndi[index].dw6.progressive_dn = 0;
3630     sampler_dndi[index].dw6.mcdi_enable = 0;
3631     sampler_dndi[index].dw6.fmd_tear_threshold = 2;
3632     sampler_dndi[index].dw6.cat_th1 = 0;
3633     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 100;
3634     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 16;
3635
3636     sampler_dndi[index].dw7.sad_tha = 5;
3637     sampler_dndi[index].dw7.sad_thb = 10;
3638     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3639     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3640     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3641     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3642     sampler_dndi[index].dw7.neighborpixel_th = 10;
3643     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3644
3645     dri_bo_unmap(pp_context->sampler_state_table.bo);
3646
3647     /* private function & data */
3648     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3649     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3650     pp_context->private_context = &pp_context->pp_dndi_context;
3651     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3652
3653     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3654     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3655     pp_static_parameter->grf1.di_top_field_first = 0;
3656     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3657
3658     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3659     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3660     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3661
3662     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3663     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3664
3665     pp_dndi_context->dest_w = w;
3666     pp_dndi_context->dest_h = h;
3667
3668     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3669
3670     pp_dndi_context->frame_order = (pp_dndi_context->frame_order + 1) % 2;
3671
3672     return VA_STATUS_SUCCESS;
3673 }
3674
3675 static int
3676 gen7_pp_dn_x_steps(void *private_context)
3677 {
3678     struct pp_dn_context *pp_dn_context = private_context;
3679
3680     return pp_dn_context->dest_w / 16;
3681 }
3682
3683 static int
3684 gen7_pp_dn_y_steps(void *private_context)
3685 {
3686     struct pp_dn_context *pp_dn_context = private_context;
3687
3688     return pp_dn_context->dest_h / 4;
3689 }
3690
3691 static int
3692 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3693 {
3694     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3695
3696     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3697     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3698
3699     return 0;
3700 }
3701
3702 static VAStatus
3703 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3704                            const struct i965_surface *src_surface,
3705                            const VARectangle *src_rect,
3706                            struct i965_surface *dst_surface,
3707                            const VARectangle *dst_rect,
3708                            void *filter_param)
3709 {
3710     struct i965_driver_data *i965 = i965_driver_data(ctx);
3711     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->pp_dn_context;
3712     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3713     struct object_surface *obj_surface;
3714     struct gen7_sampler_dndi *sampler_dn;
3715     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3716     int index;
3717     int w, h;
3718     int orig_w, orig_h;
3719     int dn_strength = 15;
3720     int dndi_top_first = 1;
3721     int dn_progressive = 0;
3722
3723     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3724         dndi_top_first = 1;
3725         dn_progressive = 1;
3726     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3727         dndi_top_first = 1;
3728         dn_progressive = 0;
3729     } else {
3730         dndi_top_first = 0;
3731         dn_progressive = 0;
3732     }
3733
3734     if (dn_filter_param) {
3735         float value = dn_filter_param->value;
3736         
3737         if (value > 1.0)
3738             value = 1.0;
3739         
3740         if (value < 0.0)
3741             value = 0.0;
3742
3743         dn_strength = (int)(value * 31.0F);
3744     }
3745
3746     /* surface */
3747     obj_surface = (struct object_surface *)src_surface->base;
3748     orig_w = obj_surface->orig_width;
3749     orig_h = obj_surface->orig_height;
3750     w = obj_surface->width;
3751     h = obj_surface->height;
3752
3753     if (pp_dn_context->stmm_bo == NULL) {
3754         pp_dn_context->stmm_bo= dri_bo_alloc(i965->intel.bufmgr,
3755                                              "STMM surface",
3756                                              w * h,
3757                                              4096);
3758         assert(pp_dn_context->stmm_bo);
3759     }
3760
3761     /* source UV surface index 1 */
3762     gen7_pp_set_surface_state(ctx, pp_context,
3763                               obj_surface->bo, w * h,
3764                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3765                               1, 0);
3766
3767     /* source YUV surface index 3 */
3768     gen7_pp_set_surface2_state(ctx, pp_context,
3769                                obj_surface->bo, 0,
3770                                orig_w, orig_h, w,
3771                                0, h,
3772                                SURFACE_FORMAT_PLANAR_420_8, 1,
3773                                3);
3774
3775     /* source (temporal reference) YUV surface index 4 */
3776     gen7_pp_set_surface2_state(ctx, pp_context,
3777                                obj_surface->bo, 0,
3778                                orig_w, orig_h, w,
3779                                0, h,
3780                                SURFACE_FORMAT_PLANAR_420_8, 1,
3781                                4);
3782
3783     /* STMM / History Statistics input surface, index 5 */
3784     gen7_pp_set_surface_state(ctx, pp_context,
3785                               pp_dn_context->stmm_bo, 0,
3786                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3787                               33, 1);
3788
3789     /* destination surface */
3790     obj_surface = (struct object_surface *)dst_surface->base;
3791     orig_w = obj_surface->orig_width;
3792     orig_h = obj_surface->orig_height;
3793     w = obj_surface->width;
3794     h = obj_surface->height;
3795
3796     /* destination Y surface index 24 */
3797     gen7_pp_set_surface_state(ctx, pp_context,
3798                               obj_surface->bo, 0,
3799                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3800                               24, 1);
3801
3802     /* destination UV surface index 25 */
3803     gen7_pp_set_surface_state(ctx, pp_context,
3804                               obj_surface->bo, w * h,
3805                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3806                               25, 1);
3807
3808     /* sampler dn */
3809     dri_bo_map(pp_context->sampler_state_table.bo, True);
3810     assert(pp_context->sampler_state_table.bo->virtual);
3811     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3812     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3813
3814     /* sample dn index 1 */
3815     index = 0;
3816     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3817     sampler_dn[index].dw0.dnmh_delt = 8;
3818     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3819     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3820     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3821     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3822
3823     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3824     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3825     sampler_dn[index].dw1.stmm_c2 = 0;
3826     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3827     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3828
3829     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3830     sampler_dn[index].dw2.bne_edge_th = 1;
3831     sampler_dn[index].dw2.smooth_mv_th = 0;
3832     sampler_dn[index].dw2.sad_tight_th = 5;
3833     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3834     sampler_dn[index].dw2.good_neighbor_th = 4;
3835
3836     sampler_dn[index].dw3.maximum_stmm = 128;
3837     sampler_dn[index].dw3.multipler_for_vecm = 2;
3838     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3839     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3840     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3841
3842     sampler_dn[index].dw4.sdi_delta = 8;
3843     sampler_dn[index].dw4.sdi_threshold = 128;
3844     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3845     sampler_dn[index].dw4.stmm_shift_up = 0;
3846     sampler_dn[index].dw4.stmm_shift_down = 0;
3847     sampler_dn[index].dw4.minimum_stmm = 0;
3848
3849     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3850     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3851     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3852     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3853
3854     sampler_dn[index].dw6.dn_enable = 1;
3855     sampler_dn[index].dw6.di_enable = 0;
3856     sampler_dn[index].dw6.di_partial = 0;
3857     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3858     sampler_dn[index].dw6.dndi_stream_id = 1;
3859     sampler_dn[index].dw6.dndi_first_frame = 1;
3860     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3861     sampler_dn[index].dw6.mcdi_enable = 0;
3862     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3863     sampler_dn[index].dw6.cat_th1 = 0;
3864     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3865     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3866
3867     sampler_dn[index].dw7.sad_tha = 5;
3868     sampler_dn[index].dw7.sad_thb = 10;
3869     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3870     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3871     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3872     sampler_dn[index].dw7.vdi_walker_enable = 0;
3873     sampler_dn[index].dw7.neighborpixel_th = 10;
3874     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3875
3876     dri_bo_unmap(pp_context->sampler_state_table.bo);
3877
3878     /* private function & data */
3879     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3880     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3881     pp_context->private_context = &pp_context->pp_dn_context;
3882     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3883
3884     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3885     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3886     pp_static_parameter->grf1.di_top_field_first = 0;
3887     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3888
3889     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3890     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3891     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3892
3893     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3894     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3895
3896     pp_dn_context->dest_w = w;
3897     pp_dn_context->dest_h = h;
3898
3899     dst_surface->flags = src_surface->flags;
3900
3901     return VA_STATUS_SUCCESS;
3902 }
3903
3904 static VAStatus
3905 ironlake_pp_initialize(
3906     VADriverContextP ctx,
3907     struct i965_post_processing_context *pp_context,
3908     const struct i965_surface *src_surface,
3909     const VARectangle *src_rect,
3910     struct i965_surface *dst_surface,
3911     const VARectangle *dst_rect,
3912     int pp_index,
3913     void *filter_param
3914 )
3915 {
3916     VAStatus va_status;
3917     struct i965_driver_data *i965 = i965_driver_data(ctx);
3918     struct pp_module *pp_module;
3919     dri_bo *bo;
3920     int static_param_size, inline_param_size;
3921
3922     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3923     bo = dri_bo_alloc(i965->intel.bufmgr,
3924                       "surface state & binding table",
3925                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3926                       4096);
3927     assert(bo);
3928     pp_context->surface_state_binding_table.bo = bo;
3929
3930     dri_bo_unreference(pp_context->curbe.bo);
3931     bo = dri_bo_alloc(i965->intel.bufmgr,
3932                       "constant buffer",
3933                       4096, 
3934                       4096);
3935     assert(bo);
3936     pp_context->curbe.bo = bo;
3937
3938     dri_bo_unreference(pp_context->idrt.bo);
3939     bo = dri_bo_alloc(i965->intel.bufmgr, 
3940                       "interface discriptor", 
3941                       sizeof(struct i965_interface_descriptor), 
3942                       4096);
3943     assert(bo);
3944     pp_context->idrt.bo = bo;
3945     pp_context->idrt.num_interface_descriptors = 0;
3946
3947     dri_bo_unreference(pp_context->sampler_state_table.bo);
3948     bo = dri_bo_alloc(i965->intel.bufmgr, 
3949                       "sampler state table", 
3950                       4096,
3951                       4096);
3952     assert(bo);
3953     dri_bo_map(bo, True);
3954     memset(bo->virtual, 0, bo->size);
3955     dri_bo_unmap(bo);
3956     pp_context->sampler_state_table.bo = bo;
3957
3958     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3959     bo = dri_bo_alloc(i965->intel.bufmgr, 
3960                       "sampler 8x8 state ",
3961                       4096,
3962                       4096);
3963     assert(bo);
3964     pp_context->sampler_state_table.bo_8x8 = bo;
3965
3966     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3967     bo = dri_bo_alloc(i965->intel.bufmgr, 
3968                       "sampler 8x8 state ",
3969                       4096,
3970                       4096);
3971     assert(bo);
3972     pp_context->sampler_state_table.bo_8x8_uv = bo;
3973
3974     dri_bo_unreference(pp_context->vfe_state.bo);
3975     bo = dri_bo_alloc(i965->intel.bufmgr, 
3976                       "vfe state", 
3977                       sizeof(struct i965_vfe_state), 
3978                       4096);
3979     assert(bo);
3980     pp_context->vfe_state.bo = bo;
3981
3982     static_param_size = sizeof(struct pp_static_parameter);
3983     inline_param_size = sizeof(struct pp_inline_parameter);
3984
3985     memset(pp_context->pp_static_parameter, 0, static_param_size);
3986     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
3987     
3988     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
3989     pp_context->current_pp = pp_index;
3990     pp_module = &pp_context->pp_modules[pp_index];
3991     
3992     if (pp_module->initialize)
3993         va_status = pp_module->initialize(ctx, pp_context,
3994                                           src_surface,
3995                                           src_rect,
3996                                           dst_surface,
3997                                           dst_rect,
3998                                           filter_param);
3999     else
4000         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4001
4002     return va_status;
4003 }
4004
4005 static VAStatus
4006 ironlake_post_processing(
4007     VADriverContextP   ctx,
4008     struct i965_post_processing_context *pp_context,
4009     const struct i965_surface *src_surface,
4010     const VARectangle *src_rect,
4011     struct i965_surface *dst_surface,
4012     const VARectangle *dst_rect,
4013     int                pp_index,
4014     void *filter_param
4015 )
4016 {
4017     VAStatus va_status;
4018
4019     va_status = ironlake_pp_initialize(ctx, pp_context,
4020                                        src_surface,
4021                                        src_rect,
4022                                        dst_surface,
4023                                        dst_rect,
4024                                        pp_index,
4025                                        filter_param);
4026
4027     if (va_status == VA_STATUS_SUCCESS) {
4028         ironlake_pp_states_setup(ctx, pp_context);
4029         ironlake_pp_pipeline_setup(ctx, pp_context);
4030     }
4031
4032     return va_status;
4033 }
4034
4035 static VAStatus
4036 gen6_pp_initialize(
4037     VADriverContextP ctx,
4038     struct i965_post_processing_context *pp_context,
4039     const struct i965_surface *src_surface,
4040     const VARectangle *src_rect,
4041     struct i965_surface *dst_surface,
4042     const VARectangle *dst_rect,
4043     int pp_index,
4044     void *filter_param
4045 )
4046 {
4047     VAStatus va_status;
4048     struct i965_driver_data *i965 = i965_driver_data(ctx);
4049     struct pp_module *pp_module;
4050     dri_bo *bo;
4051     int static_param_size, inline_param_size;
4052
4053     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4054     bo = dri_bo_alloc(i965->intel.bufmgr,
4055                       "surface state & binding table",
4056                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4057                       4096);
4058     assert(bo);
4059     pp_context->surface_state_binding_table.bo = bo;
4060
4061     dri_bo_unreference(pp_context->curbe.bo);
4062     bo = dri_bo_alloc(i965->intel.bufmgr,
4063                       "constant buffer",
4064                       4096, 
4065                       4096);
4066     assert(bo);
4067     pp_context->curbe.bo = bo;
4068
4069     dri_bo_unreference(pp_context->idrt.bo);
4070     bo = dri_bo_alloc(i965->intel.bufmgr, 
4071                       "interface discriptor", 
4072                       sizeof(struct gen6_interface_descriptor_data), 
4073                       4096);
4074     assert(bo);
4075     pp_context->idrt.bo = bo;
4076     pp_context->idrt.num_interface_descriptors = 0;
4077
4078     dri_bo_unreference(pp_context->sampler_state_table.bo);
4079     bo = dri_bo_alloc(i965->intel.bufmgr, 
4080                       "sampler state table", 
4081                       4096,
4082                       4096);
4083     assert(bo);
4084     dri_bo_map(bo, True);
4085     memset(bo->virtual, 0, bo->size);
4086     dri_bo_unmap(bo);
4087     pp_context->sampler_state_table.bo = bo;
4088
4089     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4090     bo = dri_bo_alloc(i965->intel.bufmgr, 
4091                       "sampler 8x8 state ",
4092                       4096,
4093                       4096);
4094     assert(bo);
4095     pp_context->sampler_state_table.bo_8x8 = bo;
4096
4097     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4098     bo = dri_bo_alloc(i965->intel.bufmgr, 
4099                       "sampler 8x8 state ",
4100                       4096,
4101                       4096);
4102     assert(bo);
4103     pp_context->sampler_state_table.bo_8x8_uv = bo;
4104
4105     dri_bo_unreference(pp_context->vfe_state.bo);
4106     bo = dri_bo_alloc(i965->intel.bufmgr, 
4107                       "vfe state", 
4108                       sizeof(struct i965_vfe_state), 
4109                       4096);
4110     assert(bo);
4111     pp_context->vfe_state.bo = bo;
4112     
4113     if (IS_GEN7(i965->intel.device_id)) {
4114         static_param_size = sizeof(struct gen7_pp_static_parameter);
4115         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4116     } else {
4117         static_param_size = sizeof(struct pp_static_parameter);
4118         inline_param_size = sizeof(struct pp_inline_parameter);
4119     }
4120
4121     memset(pp_context->pp_static_parameter, 0, static_param_size);
4122     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4123
4124     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4125     pp_context->current_pp = pp_index;
4126     pp_module = &pp_context->pp_modules[pp_index];
4127     
4128     if (pp_module->initialize)
4129         va_status = pp_module->initialize(ctx, pp_context,
4130                                           src_surface,
4131                                           src_rect,
4132                                           dst_surface,
4133                                           dst_rect,
4134                                           filter_param);
4135     else
4136         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4137
4138     calculate_boundary_block_mask(pp_context, dst_rect);
4139
4140     return va_status;
4141 }
4142
4143
4144 static void
4145 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4146                                    struct i965_post_processing_context *pp_context)
4147 {
4148     struct i965_driver_data *i965 = i965_driver_data(ctx);
4149     struct gen6_interface_descriptor_data *desc;
4150     dri_bo *bo;
4151     int pp_index = pp_context->current_pp;
4152
4153     bo = pp_context->idrt.bo;
4154     dri_bo_map(bo, True);
4155     assert(bo->virtual);
4156     desc = bo->virtual;
4157     memset(desc, 0, sizeof(*desc));
4158     desc->desc0.kernel_start_pointer = 
4159         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4160     desc->desc1.single_program_flow = 1;
4161     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4162     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4163     desc->desc2.sampler_state_pointer = 
4164         pp_context->sampler_state_table.bo->offset >> 5;
4165     desc->desc3.binding_table_entry_count = 0;
4166     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4167     desc->desc4.constant_urb_entry_read_offset = 0;
4168
4169     if (IS_GEN7(i965->intel.device_id))
4170         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4171     else
4172         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4173
4174     dri_bo_emit_reloc(bo,
4175                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4176                       0,
4177                       offsetof(struct gen6_interface_descriptor_data, desc0),
4178                       pp_context->pp_modules[pp_index].kernel.bo);
4179
4180     dri_bo_emit_reloc(bo,
4181                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4182                       desc->desc2.sampler_count << 2,
4183                       offsetof(struct gen6_interface_descriptor_data, desc2),
4184                       pp_context->sampler_state_table.bo);
4185
4186     dri_bo_unmap(bo);
4187     pp_context->idrt.num_interface_descriptors++;
4188 }
4189
4190 static void
4191 gen6_pp_upload_constants(VADriverContextP ctx,
4192                          struct i965_post_processing_context *pp_context)
4193 {
4194     struct i965_driver_data *i965 = i965_driver_data(ctx);
4195     unsigned char *constant_buffer;
4196     int param_size;
4197
4198     assert(sizeof(struct pp_static_parameter) == 128);
4199     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4200
4201     if (IS_GEN7(i965->intel.device_id) ||
4202         IS_GEN8(i965->intel.device_id))
4203         param_size = sizeof(struct gen7_pp_static_parameter);
4204     else
4205         param_size = sizeof(struct pp_static_parameter);
4206
4207     dri_bo_map(pp_context->curbe.bo, 1);
4208     assert(pp_context->curbe.bo->virtual);
4209     constant_buffer = pp_context->curbe.bo->virtual;
4210     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4211     dri_bo_unmap(pp_context->curbe.bo);
4212 }
4213
4214 static void
4215 gen6_pp_states_setup(VADriverContextP ctx,
4216                      struct i965_post_processing_context *pp_context)
4217 {
4218     gen6_pp_interface_descriptor_table(ctx, pp_context);
4219     gen6_pp_upload_constants(ctx, pp_context);
4220 }
4221
4222 static void
4223 gen6_pp_pipeline_select(VADriverContextP ctx,
4224                         struct i965_post_processing_context *pp_context)
4225 {
4226     struct intel_batchbuffer *batch = pp_context->batch;
4227
4228     BEGIN_BATCH(batch, 1);
4229     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4230     ADVANCE_BATCH(batch);
4231 }
4232
4233 static void
4234 gen6_pp_state_base_address(VADriverContextP ctx,
4235                            struct i965_post_processing_context *pp_context)
4236 {
4237     struct intel_batchbuffer *batch = pp_context->batch;
4238
4239     BEGIN_BATCH(batch, 10);
4240     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4241     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4242     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4243     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4244     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4245     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4246     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4247     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4248     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4249     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4250     ADVANCE_BATCH(batch);
4251 }
4252
4253 static void
4254 gen6_pp_vfe_state(VADriverContextP ctx,
4255                   struct i965_post_processing_context *pp_context)
4256 {
4257     struct intel_batchbuffer *batch = pp_context->batch;
4258
4259     BEGIN_BATCH(batch, 8);
4260     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4261     OUT_BATCH(batch, 0);
4262     OUT_BATCH(batch,
4263               (pp_context->vfe_gpu_state.max_num_threads - 1) << 16 |
4264               pp_context->vfe_gpu_state.num_urb_entries << 8);
4265     OUT_BATCH(batch, 0);
4266     OUT_BATCH(batch,
4267               (pp_context->vfe_gpu_state.urb_entry_size) << 16 |  
4268                 /* URB Entry Allocation Size, in 256 bits unit */
4269               (pp_context->vfe_gpu_state.curbe_allocation_size));
4270                 /* CURBE Allocation Size, in 256 bits unit */
4271     OUT_BATCH(batch, 0);
4272     OUT_BATCH(batch, 0);
4273     OUT_BATCH(batch, 0);
4274     ADVANCE_BATCH(batch);
4275 }
4276
4277 static void
4278 gen6_pp_curbe_load(VADriverContextP ctx,
4279                    struct i965_post_processing_context *pp_context)
4280 {
4281     struct intel_batchbuffer *batch = pp_context->batch;
4282     struct i965_driver_data *i965 = i965_driver_data(ctx);
4283     int param_size;
4284
4285     if (IS_GEN7(i965->intel.device_id) ||
4286         IS_GEN8(i965->intel.device_id))
4287         param_size = sizeof(struct gen7_pp_static_parameter);
4288     else
4289         param_size = sizeof(struct pp_static_parameter);
4290
4291     BEGIN_BATCH(batch, 4);
4292     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4293     OUT_BATCH(batch, 0);
4294     OUT_BATCH(batch,
4295               param_size);
4296     OUT_RELOC(batch, 
4297               pp_context->curbe.bo,
4298               I915_GEM_DOMAIN_INSTRUCTION, 0,
4299               0);
4300     ADVANCE_BATCH(batch);
4301 }
4302
4303 static void
4304 gen6_interface_descriptor_load(VADriverContextP ctx,
4305                                struct i965_post_processing_context *pp_context)
4306 {
4307     struct intel_batchbuffer *batch = pp_context->batch;
4308
4309     BEGIN_BATCH(batch, 4);
4310     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4311     OUT_BATCH(batch, 0);
4312     OUT_BATCH(batch,
4313               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4314     OUT_RELOC(batch, 
4315               pp_context->idrt.bo,
4316               I915_GEM_DOMAIN_INSTRUCTION, 0,
4317               0);
4318     ADVANCE_BATCH(batch);
4319 }
4320
4321 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4322 {
4323     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4324
4325     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4326     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4327     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4328     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4329     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4330     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4331
4332     /* 1 x N */
4333     if (x_steps == 1) {
4334         if (y == y_steps-1) {
4335             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4336         }
4337         else {
4338             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4339         }
4340     }
4341
4342     /* M x 1 */
4343     if (y_steps == 1) {
4344         if (x == 0) { // all blocks in this group are on the left edge
4345             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4346             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4347         }
4348         else if (x == x_steps-1) {
4349             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4350             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4351         }
4352         else {
4353             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4354             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4355             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4356         }
4357     }
4358
4359 }
4360
4361 static void
4362 gen6_pp_object_walker(VADriverContextP ctx,
4363                       struct i965_post_processing_context *pp_context)
4364 {
4365     struct i965_driver_data *i965 = i965_driver_data(ctx);
4366     struct intel_batchbuffer *batch = pp_context->batch;
4367     int x, x_steps, y, y_steps;
4368     int param_size, command_length_in_dws;
4369     dri_bo *command_buffer;
4370     unsigned int *command_ptr;
4371
4372     if (IS_GEN7(i965->intel.device_id) ||
4373         IS_GEN8(i965->intel.device_id))
4374         param_size = sizeof(struct gen7_pp_inline_parameter);
4375     else
4376         param_size = sizeof(struct pp_inline_parameter);
4377
4378     x_steps = pp_context->pp_x_steps(pp_context->private_context);
4379     y_steps = pp_context->pp_y_steps(pp_context->private_context);
4380     command_length_in_dws = 6 + (param_size >> 2);
4381     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4382                                   "command objects buffer",
4383                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4384                                   4096);
4385
4386     dri_bo_map(command_buffer, 1);
4387     command_ptr = command_buffer->virtual;
4388
4389     for (y = 0; y < y_steps; y++) {
4390         for (x = 0; x < x_steps; x++) {
4391             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4392                 // some common block parameter update goes here, apply to all pp functions
4393                 if (IS_GEN6(i965->intel.device_id))
4394                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4395                 
4396                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4397                 *command_ptr++ = 0;
4398                 *command_ptr++ = 0;
4399                 *command_ptr++ = 0;
4400                 *command_ptr++ = 0;
4401                 *command_ptr++ = 0;
4402                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4403                 command_ptr += (param_size >> 2);
4404             }
4405         }
4406     }
4407
4408     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4409         *command_ptr++ = 0;
4410
4411     *command_ptr = MI_BATCH_BUFFER_END;
4412
4413     dri_bo_unmap(command_buffer);
4414
4415     if (IS_GEN8(i965->intel.device_id)) {
4416         BEGIN_BATCH(batch, 3);
4417         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
4418         OUT_RELOC(batch, command_buffer, 
4419               I915_GEM_DOMAIN_COMMAND, 0, 
4420               0);
4421         OUT_BATCH(batch, 0);
4422         ADVANCE_BATCH(batch);
4423     } else {
4424         BEGIN_BATCH(batch, 2);
4425         OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
4426         OUT_RELOC(batch, command_buffer, 
4427               I915_GEM_DOMAIN_COMMAND, 0, 
4428               0);
4429         ADVANCE_BATCH(batch);
4430     }
4431     
4432     dri_bo_unreference(command_buffer);
4433
4434     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4435      * will cause control to pass back to ring buffer 
4436      */
4437     intel_batchbuffer_end_atomic(batch);
4438     intel_batchbuffer_flush(batch);
4439     intel_batchbuffer_start_atomic(batch, 0x1000);
4440 }
4441
4442 static void
4443 gen6_pp_pipeline_setup(VADriverContextP ctx,
4444                        struct i965_post_processing_context *pp_context)
4445 {
4446     struct intel_batchbuffer *batch = pp_context->batch;
4447
4448     intel_batchbuffer_start_atomic(batch, 0x1000);
4449     intel_batchbuffer_emit_mi_flush(batch);
4450     gen6_pp_pipeline_select(ctx, pp_context);
4451     gen6_pp_state_base_address(ctx, pp_context);
4452     gen6_pp_vfe_state(ctx, pp_context);
4453     gen6_pp_curbe_load(ctx, pp_context);
4454     gen6_interface_descriptor_load(ctx, pp_context);
4455     gen6_pp_object_walker(ctx, pp_context);
4456     intel_batchbuffer_end_atomic(batch);
4457 }
4458
4459 static VAStatus
4460 gen6_post_processing(
4461     VADriverContextP ctx,
4462     struct i965_post_processing_context *pp_context,
4463     const struct i965_surface *src_surface,
4464     const VARectangle *src_rect,
4465     struct i965_surface *dst_surface,
4466     const VARectangle *dst_rect,
4467     int pp_index,
4468     void *filter_param
4469 )
4470 {
4471     VAStatus va_status;
4472     
4473     va_status = gen6_pp_initialize(ctx, pp_context,
4474                                    src_surface,
4475                                    src_rect,
4476                                    dst_surface,
4477                                    dst_rect,
4478                                    pp_index,
4479                                    filter_param);
4480
4481     if (va_status == VA_STATUS_SUCCESS) {
4482         gen6_pp_states_setup(ctx, pp_context);
4483         gen6_pp_pipeline_setup(ctx, pp_context);
4484     }
4485
4486     if (va_status == VA_STATUS_SUCCESS_1)
4487         va_status = VA_STATUS_SUCCESS;
4488
4489     return va_status;
4490 }
4491
4492 static VAStatus
4493 i965_post_processing_internal(
4494     VADriverContextP   ctx,
4495     struct i965_post_processing_context *pp_context,
4496     const struct i965_surface *src_surface,
4497     const VARectangle *src_rect,
4498     struct i965_surface *dst_surface,
4499     const VARectangle *dst_rect,
4500     int                pp_index,
4501     void *filter_param
4502 )
4503 {
4504     VAStatus va_status;
4505     struct i965_driver_data *i965 = i965_driver_data(ctx);
4506
4507     if (pp_context && pp_context->intel_post_processing) {
4508         va_status = (pp_context->intel_post_processing)(ctx, pp_context,
4509                           src_surface, src_rect,
4510                           dst_surface, dst_rect,
4511                           pp_index, filter_param);
4512     } else {
4513         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4514     }
4515
4516     return va_status;
4517 }
4518
4519 static void
4520 rgb_to_yuv(unsigned int argb,
4521            unsigned char *y,
4522            unsigned char *u,
4523            unsigned char *v,
4524            unsigned char *a)
4525 {
4526     int r = ((argb >> 16) & 0xff);
4527     int g = ((argb >> 8) & 0xff);
4528     int b = ((argb >> 0) & 0xff);
4529     
4530     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4531     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4532     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4533     *a = ((argb >> 24) & 0xff);
4534 }
4535
4536 static void 
4537 i965_vpp_clear_surface(VADriverContextP ctx,
4538                        struct i965_post_processing_context *pp_context,
4539                        struct object_surface *obj_surface,
4540                        unsigned int color)
4541 {
4542     struct i965_driver_data *i965 = i965_driver_data(ctx);
4543     struct intel_batchbuffer *batch = pp_context->batch;
4544     unsigned int blt_cmd, br13;
4545     unsigned int tiling = 0, swizzle = 0;
4546     int pitch;
4547     unsigned char y, u, v, a = 0;
4548     int region_width, region_height;
4549
4550     /* Currently only support NV12 surface */
4551     if (!obj_surface || obj_surface->fourcc != VA_FOURCC_NV12)
4552         return;
4553
4554     rgb_to_yuv(color, &y, &u, &v, &a);
4555
4556     if (a == 0)
4557         return;
4558
4559     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4560     blt_cmd = XY_COLOR_BLT_CMD;
4561     pitch = obj_surface->width;
4562
4563     if (tiling != I915_TILING_NONE) {
4564         assert(tiling == I915_TILING_Y);
4565         // blt_cmd |= XY_COLOR_BLT_DST_TILED;
4566         // pitch >>= 2;
4567     }
4568
4569     br13 = 0xf0 << 16;
4570     br13 |= BR13_8;
4571     br13 |= pitch;
4572
4573     if (IS_GEN6(i965->intel.device_id) ||
4574         IS_GEN7(i965->intel.device_id) ||
4575         IS_GEN8(i965->intel.device_id)) {
4576         intel_batchbuffer_start_atomic_blt(batch, 48);
4577         BEGIN_BLT_BATCH(batch, 12);
4578     } else {
4579         intel_batchbuffer_start_atomic(batch, 48);
4580         BEGIN_BATCH(batch, 12);
4581     }
4582
4583     region_width = obj_surface->width;
4584     region_height = obj_surface->height;
4585
4586     OUT_BATCH(batch, blt_cmd);
4587     OUT_BATCH(batch, br13);
4588     OUT_BATCH(batch,
4589               0 << 16 |
4590               0);
4591     OUT_BATCH(batch,
4592               region_height << 16 |
4593               region_width);
4594     OUT_RELOC(batch, obj_surface->bo, 
4595               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4596               0);
4597     OUT_BATCH(batch, y);
4598
4599     br13 = 0xf0 << 16;
4600     br13 |= BR13_565;
4601     br13 |= pitch;
4602
4603     region_width = obj_surface->width / 2;
4604     region_height = obj_surface->height / 2;
4605
4606     if (tiling == I915_TILING_Y) {
4607         region_height = ALIGN(obj_surface->height / 2, 32);
4608     }
4609
4610     OUT_BATCH(batch, blt_cmd);
4611     OUT_BATCH(batch, br13);
4612     OUT_BATCH(batch,
4613               0 << 16 |
4614               0);
4615     OUT_BATCH(batch,
4616               region_height << 16 |
4617               region_width);
4618     OUT_RELOC(batch, obj_surface->bo, 
4619               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4620               obj_surface->width * obj_surface->y_cb_offset);
4621     OUT_BATCH(batch, v << 8 | u);
4622
4623     ADVANCE_BATCH(batch);
4624     intel_batchbuffer_end_atomic(batch);
4625 }
4626
4627 VAStatus
4628 i965_scaling_processing(
4629     VADriverContextP   ctx,
4630     struct object_surface *src_surface_obj,
4631     const VARectangle *src_rect,
4632     struct object_surface *dst_surface_obj,
4633     const VARectangle *dst_rect,
4634     unsigned int       flags)
4635 {
4636     VAStatus va_status = VA_STATUS_SUCCESS;
4637     struct i965_driver_data *i965 = i965_driver_data(ctx);
4638  
4639     assert(src_surface_obj->fourcc == VA_FOURCC_NV12);
4640     assert(dst_surface_obj->fourcc == VA_FOURCC_NV12);
4641
4642     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4643         struct i965_surface src_surface;
4644         struct i965_surface dst_surface;
4645
4646          _i965LockMutex(&i965->pp_mutex);
4647
4648          src_surface.base = (struct object_base *)src_surface_obj;
4649          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4650          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4651          dst_surface.base = (struct object_base *)dst_surface_obj;
4652          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4653          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4654
4655          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4656                                                    &src_surface,
4657                                                    src_rect,
4658                                                    &dst_surface,
4659                                                    dst_rect,
4660                                                    PP_NV12_AVS,
4661                                                    NULL);
4662
4663          _i965UnlockMutex(&i965->pp_mutex);
4664     }
4665
4666     return va_status;
4667 }
4668
4669 VASurfaceID
4670 i965_post_processing(
4671     VADriverContextP   ctx,
4672     struct object_surface *obj_surface,
4673     const VARectangle *src_rect,
4674     const VARectangle *dst_rect,
4675     unsigned int       flags,
4676     int               *has_done_scaling  
4677 )
4678 {
4679     struct i965_driver_data *i965 = i965_driver_data(ctx);
4680     VASurfaceID out_surface_id = VA_INVALID_ID;
4681     VASurfaceID tmp_id = VA_INVALID_ID;
4682     
4683     *has_done_scaling = 0;
4684
4685     if (HAS_PP(i965)) {
4686         VAStatus status;
4687         struct i965_surface src_surface;
4688         struct i965_surface dst_surface;
4689
4690         /* Currently only support post processing for NV12 surface */
4691         if (obj_surface->fourcc != VA_FOURCC_NV12)
4692             return out_surface_id;
4693
4694         _i965LockMutex(&i965->pp_mutex);
4695
4696         if (flags & I965_PP_FLAG_MCDI) {
4697             src_surface.base = (struct object_base *)obj_surface;
4698             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4699             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4700                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4701
4702             status = i965_CreateSurfaces(ctx,
4703                                          obj_surface->orig_width,
4704                                          obj_surface->orig_height,
4705                                          VA_RT_FORMAT_YUV420,
4706                                          1,
4707                                          &out_surface_id);
4708             assert(status == VA_STATUS_SUCCESS);
4709             obj_surface = SURFACE(out_surface_id);
4710             assert(obj_surface);
4711             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4712             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4713
4714             dst_surface.base = (struct object_base *)obj_surface;
4715             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4716             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4717
4718             i965_post_processing_internal(ctx, i965->pp_context,
4719                                           &src_surface,
4720                                           src_rect,
4721                                           &dst_surface,
4722                                           dst_rect,
4723                                           PP_NV12_DNDI,
4724                                           NULL);
4725         }
4726
4727         if (flags & I965_PP_FLAG_AVS) {
4728             struct i965_render_state *render_state = &i965->render_state;
4729             struct intel_region *dest_region = render_state->draw_region;
4730
4731             if (out_surface_id != VA_INVALID_ID)
4732                 tmp_id = out_surface_id;
4733
4734             src_surface.base = (struct object_base *)obj_surface;
4735             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4736             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4737
4738             status = i965_CreateSurfaces(ctx,
4739                                          dest_region->width,
4740                                          dest_region->height,
4741                                          VA_RT_FORMAT_YUV420,
4742                                          1,
4743                                          &out_surface_id);
4744             assert(status == VA_STATUS_SUCCESS);
4745             obj_surface = SURFACE(out_surface_id);
4746             assert(obj_surface);
4747             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4748             i965_vpp_clear_surface(ctx, i965->pp_context, obj_surface, 0); 
4749
4750             dst_surface.base = (struct object_base *)obj_surface;
4751             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4752             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4753
4754             i965_post_processing_internal(ctx, i965->pp_context,
4755                                           &src_surface,
4756                                           src_rect,
4757                                           &dst_surface,
4758                                           dst_rect,
4759                                           PP_NV12_AVS,
4760                                           NULL);
4761
4762             if (tmp_id != VA_INVALID_ID)
4763                 i965_DestroySurfaces(ctx, &tmp_id, 1);
4764                 
4765             *has_done_scaling = 1;
4766         }
4767
4768         _i965UnlockMutex(&i965->pp_mutex);
4769     }
4770
4771     return out_surface_id;
4772 }       
4773
4774 static VAStatus
4775 i965_image_pl2_processing(VADriverContextP ctx,
4776                           const struct i965_surface *src_surface,
4777                           const VARectangle *src_rect,
4778                           struct i965_surface *dst_surface,
4779                           const VARectangle *dst_rect);
4780
4781 static VAStatus
4782 i965_image_plx_nv12_plx_processing(VADriverContextP ctx,
4783                                    VAStatus (*i965_image_plx_nv12_processing)(
4784                                        VADriverContextP,
4785                                        const struct i965_surface *,
4786                                        const VARectangle *,
4787                                        struct i965_surface *,
4788                                        const VARectangle *),
4789                                    const struct i965_surface *src_surface,
4790                                    const VARectangle *src_rect,
4791                                    struct i965_surface *dst_surface,
4792                                    const VARectangle *dst_rect)
4793 {
4794     struct i965_driver_data *i965 = i965_driver_data(ctx);
4795     VAStatus status;
4796     VASurfaceID tmp_surface_id = VA_INVALID_SURFACE;
4797     struct object_surface *obj_surface = NULL;
4798     struct i965_surface tmp_surface;
4799     int width, height;
4800
4801     pp_get_surface_size(ctx, dst_surface, &width, &height);
4802     status = i965_CreateSurfaces(ctx,
4803                                  width,
4804                                  height,
4805                                  VA_RT_FORMAT_YUV420,
4806                                  1,
4807                                  &tmp_surface_id);
4808     assert(status == VA_STATUS_SUCCESS);
4809     obj_surface = SURFACE(tmp_surface_id);
4810     assert(obj_surface);
4811     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4812
4813     tmp_surface.base = (struct object_base *)obj_surface;
4814     tmp_surface.type = I965_SURFACE_TYPE_SURFACE;
4815     tmp_surface.flags = I965_SURFACE_FLAG_FRAME;
4816
4817     status = i965_image_plx_nv12_processing(ctx,
4818                                             src_surface,
4819                                             src_rect,
4820                                             &tmp_surface,
4821                                             dst_rect);
4822
4823     if (status == VA_STATUS_SUCCESS)
4824         status = i965_image_pl2_processing(ctx,
4825                                            &tmp_surface,
4826                                            dst_rect,
4827                                            dst_surface,
4828                                            dst_rect);
4829
4830     i965_DestroySurfaces(ctx,
4831                          &tmp_surface_id,
4832                          1);
4833
4834     return status;
4835 }
4836
4837
4838 static VAStatus
4839 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4840                                const struct i965_surface *src_surface,
4841                                const VARectangle *src_rect,
4842                                struct i965_surface *dst_surface,
4843                                const VARectangle *dst_rect)
4844 {
4845     struct i965_driver_data *i965 = i965_driver_data(ctx);
4846     struct i965_post_processing_context *pp_context = i965->pp_context;
4847     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4848     VAStatus vaStatus;
4849
4850     switch (fourcc) {
4851     case VA_FOURCC_NV12:
4852         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4853                                                  src_surface,
4854                                                  src_rect,
4855                                                  dst_surface,
4856                                                  dst_rect,
4857                                                  PP_RGBX_LOAD_SAVE_NV12,
4858                                                  NULL);
4859         intel_batchbuffer_flush(pp_context->batch);
4860         break;
4861
4862     default:
4863         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4864                                                       i965_image_pl1_rgbx_processing,
4865                                                       src_surface,
4866                                                       src_rect,
4867                                                       dst_surface,
4868                                                       dst_rect);
4869         break;
4870     }
4871
4872     return vaStatus;
4873 }
4874
4875 static VAStatus
4876 i965_image_pl3_processing(VADriverContextP ctx,
4877                           const struct i965_surface *src_surface,
4878                           const VARectangle *src_rect,
4879                           struct i965_surface *dst_surface,
4880                           const VARectangle *dst_rect)
4881 {
4882     struct i965_driver_data *i965 = i965_driver_data(ctx);
4883     struct i965_post_processing_context *pp_context = i965->pp_context;
4884     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4885     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4886
4887     switch (fourcc) {
4888     case VA_FOURCC_NV12:
4889         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4890                                                  src_surface,
4891                                                  src_rect,
4892                                                  dst_surface,
4893                                                  dst_rect,
4894                                                  PP_PL3_LOAD_SAVE_N12,
4895                                                  NULL);
4896         intel_batchbuffer_flush(pp_context->batch);
4897         break;
4898
4899     case VA_FOURCC_IMC1:
4900     case VA_FOURCC_IMC3:
4901     case VA_FOURCC_YV12:
4902     case VA_FOURCC_I420:
4903         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4904                                                  src_surface,
4905                                                  src_rect,
4906                                                  dst_surface,
4907                                                  dst_rect,
4908                                                  PP_PL3_LOAD_SAVE_PL3,
4909                                                  NULL);
4910         intel_batchbuffer_flush(pp_context->batch);
4911         break;
4912
4913     case VA_FOURCC_YUY2:
4914     case VA_FOURCC_UYVY:
4915         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4916                                                  src_surface,
4917                                                  src_rect,
4918                                                  dst_surface,
4919                                                  dst_rect,
4920                                                  PP_PL3_LOAD_SAVE_PA,
4921                                                  NULL);
4922         intel_batchbuffer_flush(pp_context->batch);
4923         break;
4924
4925     default:
4926         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
4927                                                       i965_image_pl3_processing,
4928                                                       src_surface,
4929                                                       src_rect,
4930                                                       dst_surface,
4931                                                       dst_rect);
4932         break;
4933     }
4934
4935     return vaStatus;
4936 }
4937
4938 static VAStatus
4939 i965_image_pl2_processing(VADriverContextP ctx,
4940                           const struct i965_surface *src_surface,
4941                           const VARectangle *src_rect,
4942                           struct i965_surface *dst_surface,
4943                           const VARectangle *dst_rect)
4944 {
4945     struct i965_driver_data *i965 = i965_driver_data(ctx);
4946     struct i965_post_processing_context *pp_context = i965->pp_context;
4947     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4948     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4949
4950     switch (fourcc) {
4951     case VA_FOURCC_NV12:
4952         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4953                                                  src_surface,
4954                                                  src_rect,
4955                                                  dst_surface,
4956                                                  dst_rect,
4957                                                  PP_NV12_LOAD_SAVE_N12,
4958                                                  NULL);
4959         break;
4960
4961     case VA_FOURCC_IMC1:
4962     case VA_FOURCC_IMC3:
4963     case VA_FOURCC_YV12:
4964     case VA_FOURCC_I420:
4965         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4966                                                  src_surface,
4967                                                  src_rect,
4968                                                  dst_surface,
4969                                                  dst_rect,
4970                                                  PP_NV12_LOAD_SAVE_PL3,
4971                                                  NULL);
4972         break;
4973
4974     case VA_FOURCC_YUY2:
4975     case VA_FOURCC_UYVY:
4976         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4977                                                  src_surface,
4978                                                  src_rect,
4979                                                  dst_surface,
4980                                                  dst_rect,
4981                                                  PP_NV12_LOAD_SAVE_PA,
4982                                                  NULL);
4983         break;
4984
4985     case VA_FOURCC_BGRX:
4986     case VA_FOURCC_BGRA:
4987     case VA_FOURCC_RGBX:
4988     case VA_FOURCC_RGBA:
4989         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4990                                                  src_surface,
4991                                                  src_rect,
4992                                                  dst_surface,
4993                                                  dst_rect,
4994                                                  PP_NV12_LOAD_SAVE_RGBX,
4995                                                  NULL);
4996         break;
4997
4998     default:
4999         return VA_STATUS_ERROR_UNIMPLEMENTED;
5000     }
5001
5002     intel_batchbuffer_flush(pp_context->batch);
5003
5004     return vaStatus;
5005 }
5006
5007 static VAStatus
5008 i965_image_pl1_processing(VADriverContextP ctx,
5009                           const struct i965_surface *src_surface,
5010                           const VARectangle *src_rect,
5011                           struct i965_surface *dst_surface,
5012                           const VARectangle *dst_rect)
5013 {
5014     struct i965_driver_data *i965 = i965_driver_data(ctx);
5015     struct i965_post_processing_context *pp_context = i965->pp_context;
5016     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
5017     VAStatus vaStatus;
5018
5019     switch (fourcc) {
5020     case VA_FOURCC_NV12:
5021         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5022                                                  src_surface,
5023                                                  src_rect,
5024                                                  dst_surface,
5025                                                  dst_rect,
5026                                                  PP_PA_LOAD_SAVE_NV12,
5027                                                  NULL);
5028         intel_batchbuffer_flush(pp_context->batch);
5029         break;
5030
5031     case VA_FOURCC_YV12:
5032         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5033                                                  src_surface,
5034                                                  src_rect,
5035                                                  dst_surface,
5036                                                  dst_rect,
5037                                                  PP_PA_LOAD_SAVE_PL3,
5038                                                  NULL);
5039         intel_batchbuffer_flush(pp_context->batch);
5040         break;
5041
5042     case VA_FOURCC_YUY2:
5043     case VA_FOURCC_UYVY:
5044         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
5045                                                  src_surface,
5046                                                  src_rect,
5047                                                  dst_surface,
5048                                                  dst_rect,
5049                                                  PP_PA_LOAD_SAVE_PA,
5050                                                  NULL);
5051         intel_batchbuffer_flush(pp_context->batch);
5052         break;
5053
5054     default:
5055         vaStatus = i965_image_plx_nv12_plx_processing(ctx,
5056                                                       i965_image_pl1_processing,
5057                                                       src_surface,
5058                                                       src_rect,
5059                                                       dst_surface,
5060                                                       dst_rect);
5061         break;
5062     }
5063
5064     return vaStatus;
5065 }
5066
5067 VAStatus
5068 i965_image_processing(VADriverContextP ctx,
5069                       const struct i965_surface *src_surface,
5070                       const VARectangle *src_rect,
5071                       struct i965_surface *dst_surface,
5072                       const VARectangle *dst_rect)
5073 {
5074     struct i965_driver_data *i965 = i965_driver_data(ctx);
5075     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
5076
5077     if (HAS_PP(i965)) {
5078         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
5079
5080         _i965LockMutex(&i965->pp_mutex);
5081
5082         switch (fourcc) {
5083         case VA_FOURCC_YV12:
5084         case VA_FOURCC_I420:
5085         case VA_FOURCC_IMC1:
5086         case VA_FOURCC_IMC3:
5087         case VA_FOURCC_422H:
5088         case VA_FOURCC_422V:
5089         case VA_FOURCC_411P:
5090         case VA_FOURCC_444P:
5091         case VA_FOURCC_YV16:
5092             status = i965_image_pl3_processing(ctx,
5093                                                src_surface,
5094                                                src_rect,
5095                                                dst_surface,
5096                                                dst_rect);
5097             break;
5098
5099         case  VA_FOURCC_NV12:
5100             status = i965_image_pl2_processing(ctx,
5101                                                src_surface,
5102                                                src_rect,
5103                                                dst_surface,
5104                                                dst_rect);
5105             break;
5106         case VA_FOURCC_YUY2:
5107         case VA_FOURCC_UYVY:
5108             status = i965_image_pl1_processing(ctx,
5109                                                src_surface,
5110                                                src_rect,
5111                                                dst_surface,
5112                                                dst_rect);
5113             break;
5114         case VA_FOURCC_BGRA:
5115         case VA_FOURCC_BGRX:
5116         case VA_FOURCC_RGBA:
5117         case VA_FOURCC_RGBX:
5118             status = i965_image_pl1_rgbx_processing(ctx,
5119                                                src_surface,
5120                                                src_rect,
5121                                                dst_surface,
5122                                                dst_rect);
5123             break;
5124         default:
5125             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5126             break;
5127         }
5128         
5129         _i965UnlockMutex(&i965->pp_mutex);
5130     }
5131
5132     return status;
5133 }       
5134
5135 static void
5136 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
5137 {
5138     int i;
5139
5140     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5141     pp_context->surface_state_binding_table.bo = NULL;
5142
5143     dri_bo_unreference(pp_context->curbe.bo);
5144     pp_context->curbe.bo = NULL;
5145
5146     dri_bo_unreference(pp_context->sampler_state_table.bo);
5147     pp_context->sampler_state_table.bo = NULL;
5148
5149     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5150     pp_context->sampler_state_table.bo_8x8 = NULL;
5151
5152     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5153     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5154
5155     dri_bo_unreference(pp_context->idrt.bo);
5156     pp_context->idrt.bo = NULL;
5157     pp_context->idrt.num_interface_descriptors = 0;
5158
5159     dri_bo_unreference(pp_context->vfe_state.bo);
5160     pp_context->vfe_state.bo = NULL;
5161
5162     dri_bo_unreference(pp_context->pp_dndi_context.stmm_bo);
5163     pp_context->pp_dndi_context.stmm_bo = NULL;
5164
5165     dri_bo_unreference(pp_context->pp_dn_context.stmm_bo);
5166     pp_context->pp_dn_context.stmm_bo = NULL;
5167
5168     for (i = 0; i < NUM_PP_MODULES; i++) {
5169         struct pp_module *pp_module = &pp_context->pp_modules[i];
5170
5171         dri_bo_unreference(pp_module->kernel.bo);
5172         pp_module->kernel.bo = NULL;
5173     }
5174
5175     free(pp_context->pp_static_parameter);
5176     free(pp_context->pp_inline_parameter);
5177     pp_context->pp_static_parameter = NULL;
5178     pp_context->pp_inline_parameter = NULL;
5179 }
5180
5181 void
5182 i965_post_processing_terminate(VADriverContextP ctx)
5183 {
5184     struct i965_driver_data *i965 = i965_driver_data(ctx);
5185     struct i965_post_processing_context *pp_context = i965->pp_context;
5186
5187     if (pp_context) {
5188         if (IS_GEN8(i965->intel.device_id)) {
5189             gen8_post_processing_context_finalize(pp_context);
5190         } else {
5191             i965_post_processing_context_finalize(pp_context);
5192         }
5193         free(pp_context);
5194     }
5195
5196     i965->pp_context = NULL;
5197 }
5198
5199 #define VPP_CURBE_ALLOCATION_SIZE       32
5200
5201 static void
5202 i965_post_processing_context_init(VADriverContextP ctx,
5203                                   struct i965_post_processing_context *pp_context,
5204                                   struct intel_batchbuffer *batch)
5205 {
5206     struct i965_driver_data *i965 = i965_driver_data(ctx);
5207     int i;
5208
5209     if (IS_GEN8(i965->intel.device_id)) {
5210         gen8_post_processing_context_init(ctx, pp_context, batch);
5211         return;
5212     };
5213
5214     if (IS_IRONLAKE(i965->intel.device_id)) {
5215         pp_context->urb.size = URB_SIZE((&i965->intel));
5216         pp_context->urb.num_vfe_entries = 32;
5217         pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5218         pp_context->urb.num_cs_entries = 1;
5219         pp_context->urb.size_cs_entry = 2;
5220         pp_context->urb.vfe_start = 0;
5221         pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5222             pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5223         assert(pp_context->urb.cs_start +
5224             pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
5225         pp_context->intel_post_processing = ironlake_post_processing;
5226     } else {
5227         pp_context->vfe_gpu_state.max_num_threads = 60;
5228         pp_context->vfe_gpu_state.num_urb_entries = 59;
5229         pp_context->vfe_gpu_state.gpgpu_mode = 0;
5230         pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
5231         pp_context->vfe_gpu_state.curbe_allocation_size = VPP_CURBE_ALLOCATION_SIZE;
5232         pp_context->intel_post_processing = gen6_post_processing;
5233     }
5234     
5235
5236     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5237     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5238     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5239     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5240
5241     if (IS_HASWELL(i965->intel.device_id))
5242         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5243     else if (IS_GEN7(i965->intel.device_id))
5244         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5245     else if (IS_GEN6(i965->intel.device_id))
5246         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5247     else if (IS_IRONLAKE(i965->intel.device_id))
5248         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5249
5250     for (i = 0; i < NUM_PP_MODULES; i++) {
5251         struct pp_module *pp_module = &pp_context->pp_modules[i];
5252         dri_bo_unreference(pp_module->kernel.bo);
5253         if (pp_module->kernel.bin && pp_module->kernel.size) {
5254             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5255                                                 pp_module->kernel.name,
5256                                                 pp_module->kernel.size,
5257                                                 4096);
5258             assert(pp_module->kernel.bo);
5259             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5260         } else {
5261             pp_module->kernel.bo = NULL;
5262         }
5263     }
5264
5265     /* static & inline parameters */
5266     if (IS_GEN7(i965->intel.device_id) ||
5267         IS_GEN8(i965->intel.device_id)) {
5268         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5269         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5270     } else {
5271         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5272         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5273     }
5274
5275     pp_context->pp_dndi_context.current_out_surface = VA_INVALID_SURFACE;
5276     pp_context->pp_dndi_context.current_out_obj_surface = NULL;
5277     pp_context->pp_dndi_context.frame_order = -1;
5278     pp_context->batch = batch;
5279 }
5280
5281 bool
5282 i965_post_processing_init(VADriverContextP ctx)
5283 {
5284     struct i965_driver_data *i965 = i965_driver_data(ctx);
5285     struct i965_post_processing_context *pp_context = i965->pp_context;
5286
5287     if (HAS_PP(i965)) {
5288         if (pp_context == NULL) {
5289             pp_context = calloc(1, sizeof(*pp_context));
5290             i965_post_processing_context_init(ctx, pp_context, i965->pp_batch);
5291             i965->pp_context = pp_context;
5292         }
5293     }
5294
5295     return true;
5296 }
5297
5298 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5299     PP_NULL,    /* VAProcFilterNone */
5300     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5301     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5302     PP_NULL,    /* VAProcFilterSharpening */
5303     PP_NULL,    /* VAProcFilterColorBalance */
5304 };
5305
5306 static const int proc_frame_to_pp_frame[3] = {
5307     I965_SURFACE_FLAG_FRAME,
5308     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5309     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5310 };
5311
5312 #define VA_STATUS_SUCCESS_1                     0xFFFFFFFE
5313
5314 VAStatus 
5315 i965_proc_picture(VADriverContextP ctx, 
5316                   VAProfile profile, 
5317                   union codec_state *codec_state,
5318                   struct hw_context *hw_context)
5319 {
5320     struct i965_driver_data *i965 = i965_driver_data(ctx);
5321     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5322     struct proc_state *proc_state = &codec_state->proc;
5323     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5324     struct object_surface *obj_surface;
5325     struct i965_surface src_surface, dst_surface;
5326     VARectangle src_rect, dst_rect;
5327     VAStatus status;
5328     int i;
5329     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5330     int num_tmp_surfaces = 0;
5331     unsigned int tiling = 0, swizzle = 0;
5332     int in_width, in_height;
5333
5334     if (pipeline_param->surface == VA_INVALID_ID ||
5335         proc_state->current_render_target == VA_INVALID_ID) {
5336         status = VA_STATUS_ERROR_INVALID_SURFACE;
5337         goto error;
5338     }
5339
5340     obj_surface = SURFACE(pipeline_param->surface);
5341
5342     if (!obj_surface) {
5343         status = VA_STATUS_ERROR_INVALID_SURFACE;
5344         goto error;
5345     }
5346
5347     if (!obj_surface->bo) {
5348         status = VA_STATUS_ERROR_INVALID_VALUE; /* The input surface is created without valid content */
5349         goto error;
5350     }
5351
5352     if (pipeline_param->num_filters && !pipeline_param->filters) {
5353         status = VA_STATUS_ERROR_INVALID_PARAMETER;
5354         goto error;
5355     }
5356
5357     in_width = obj_surface->orig_width;
5358     in_height = obj_surface->orig_height;
5359     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5360
5361     src_surface.base = (struct object_base *)obj_surface;
5362     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5363     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5364
5365     VASurfaceID out_surface_id = VA_INVALID_ID;
5366     if (obj_surface->fourcc != VA_FOURCC_NV12) {
5367         src_surface.base = (struct object_base *)obj_surface;
5368         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5369         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5370         src_rect.x = 0;
5371         src_rect.y = 0;
5372         src_rect.width = in_width;
5373         src_rect.height = in_height;
5374
5375         status = i965_CreateSurfaces(ctx,
5376                                      in_width,
5377                                      in_height,
5378                                      VA_RT_FORMAT_YUV420,
5379                                      1,
5380                                      &out_surface_id);
5381         assert(status == VA_STATUS_SUCCESS);
5382         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5383         obj_surface = SURFACE(out_surface_id);
5384         assert(obj_surface);
5385         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5386
5387         dst_surface.base = (struct object_base *)obj_surface;
5388         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5389         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5390         dst_rect.x = 0;
5391         dst_rect.y = 0;
5392         dst_rect.width = in_width;
5393         dst_rect.height = in_height;
5394
5395         status = i965_image_processing(ctx,
5396                                        &src_surface,
5397                                        &src_rect,
5398                                        &dst_surface,
5399                                        &dst_rect);
5400         assert(status == VA_STATUS_SUCCESS);
5401
5402         src_surface.base = (struct object_base *)obj_surface;
5403         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5404         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5405     }
5406
5407     if (pipeline_param->surface_region) {
5408         src_rect.x = pipeline_param->surface_region->x;
5409         src_rect.y = pipeline_param->surface_region->y;
5410         src_rect.width = pipeline_param->surface_region->width;
5411         src_rect.height = pipeline_param->surface_region->height;
5412     } else {
5413         src_rect.x = 0;
5414         src_rect.y = 0;
5415         src_rect.width = in_width;
5416         src_rect.height = in_height;
5417     }
5418
5419     if (pipeline_param->output_region) {
5420         dst_rect.x = pipeline_param->output_region->x;
5421         dst_rect.y = pipeline_param->output_region->y;
5422         dst_rect.width = pipeline_param->output_region->width;
5423         dst_rect.height = pipeline_param->output_region->height;
5424     } else {
5425         dst_rect.x = 0;
5426         dst_rect.y = 0;
5427         dst_rect.width = in_width;
5428         dst_rect.height = in_height;
5429     }
5430
5431     proc_context->pp_context.pipeline_param = pipeline_param;
5432
5433     for (i = 0; i < pipeline_param->num_filters; i++) {
5434         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5435         VAProcFilterParameterBufferBase *filter_param = NULL;
5436         VAProcFilterType filter_type;
5437         int kernel_index;
5438
5439         if (!obj_buffer ||
5440             !obj_buffer->buffer_store ||
5441             !obj_buffer->buffer_store->buffer) {
5442             status = VA_STATUS_ERROR_INVALID_FILTER_CHAIN;
5443             goto error;
5444         }
5445
5446         out_surface_id = VA_INVALID_ID;
5447         filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5448         filter_type = filter_param->type;
5449         kernel_index = procfilter_to_pp_flag[filter_type];
5450
5451         if (kernel_index != PP_NULL &&
5452             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5453             status = i965_CreateSurfaces(ctx,
5454                                          in_width,
5455                                          in_height,
5456                                          VA_RT_FORMAT_YUV420,
5457                                          1,
5458                                          &out_surface_id);
5459             assert(status == VA_STATUS_SUCCESS);
5460             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5461             obj_surface = SURFACE(out_surface_id);
5462             assert(obj_surface);
5463             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5464             dst_surface.base = (struct object_base *)obj_surface;
5465             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5466             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5467                                                    &src_surface,
5468                                                    &src_rect,
5469                                                    &dst_surface,
5470                                                    &src_rect,
5471                                                    kernel_index,
5472                                                    filter_param);
5473
5474             if (status == VA_STATUS_SUCCESS) {
5475                 src_surface.base = dst_surface.base;
5476                 src_surface.type = dst_surface.type;
5477                 src_surface.flags = dst_surface.flags;
5478             }
5479         }
5480     }
5481
5482     proc_context->pp_context.pipeline_param = NULL;
5483     obj_surface = SURFACE(proc_state->current_render_target);
5484     
5485     if (!obj_surface) {
5486         status = VA_STATUS_ERROR_INVALID_SURFACE;
5487         goto error;
5488     }
5489
5490     int csc_needed = 0;
5491     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC_NV12){
5492         csc_needed = 1;
5493         out_surface_id = VA_INVALID_ID;
5494         status = i965_CreateSurfaces(ctx,
5495                                      obj_surface->orig_width,
5496                                      obj_surface->orig_height,
5497                                      VA_RT_FORMAT_YUV420, 
5498                                      1,
5499                                      &out_surface_id);
5500         assert(status == VA_STATUS_SUCCESS);
5501         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5502         struct object_surface *csc_surface = SURFACE(out_surface_id);
5503         assert(csc_surface);
5504         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5505         dst_surface.base = (struct object_base *)csc_surface;
5506     } else {
5507         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5508         dst_surface.base = (struct object_base *)obj_surface;
5509     }
5510
5511     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5512     i965_vpp_clear_surface(ctx, &proc_context->pp_context, obj_surface, pipeline_param->output_background_color); 
5513
5514     // load/save doesn't support different origin offset for src and dst surface
5515     if (src_rect.width == dst_rect.width &&
5516         src_rect.height == dst_rect.height &&
5517         src_rect.x == dst_rect.x &&
5518         src_rect.y == dst_rect.y) {
5519         i965_post_processing_internal(ctx, &proc_context->pp_context,
5520                                       &src_surface,
5521                                       &src_rect,
5522                                       &dst_surface,
5523                                       &dst_rect,
5524                                       PP_NV12_LOAD_SAVE_N12,
5525                                       NULL);
5526     } else {
5527
5528         i965_post_processing_internal(ctx, &proc_context->pp_context,
5529                                       &src_surface,
5530                                       &src_rect,
5531                                       &dst_surface,
5532                                       &dst_rect,
5533                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5534                                       PP_NV12_AVS : PP_NV12_SCALING,
5535                                       NULL);
5536     }
5537
5538     if (csc_needed) {
5539         src_surface.base = dst_surface.base;
5540         src_surface.type = dst_surface.type;
5541         src_surface.flags = dst_surface.flags;
5542         dst_surface.base = (struct object_base *)obj_surface;
5543         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5544         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5545     }
5546     
5547     if (num_tmp_surfaces)
5548         i965_DestroySurfaces(ctx,
5549                              tmp_surfaces,
5550                              num_tmp_surfaces);
5551
5552     intel_batchbuffer_flush(hw_context->batch);
5553
5554     return VA_STATUS_SUCCESS;
5555
5556 error:
5557     if (num_tmp_surfaces)
5558         i965_DestroySurfaces(ctx,
5559                              tmp_surfaces,
5560                              num_tmp_surfaces);
5561
5562     return status;
5563 }
5564
5565 static void
5566 i965_proc_context_destroy(void *hw_context)
5567 {
5568     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5569
5570     i965_post_processing_context_finalize(&proc_context->pp_context);
5571     intel_batchbuffer_free(proc_context->base.batch);
5572     free(proc_context);
5573 }
5574
5575 struct hw_context *
5576 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5577 {
5578     struct intel_driver_data *intel = intel_driver_data(ctx);
5579     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5580
5581     proc_context->base.destroy = i965_proc_context_destroy;
5582     proc_context->base.run = i965_proc_picture;
5583     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5584     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5585
5586     return (struct hw_context *)proc_context;
5587 }
5588
5589