MPEG-2 encoding path
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
44                      IS_GEN6((ctx)->intel.device_id) ||         \
45                      IS_GEN7((ctx)->intel.device_id))
46
47 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
48 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
49 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
50
51 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
52 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
53 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
54
55 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
56 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
57 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
58
59 #define GPU_ASM_BLOCK_WIDTH         16
60 #define GPU_ASM_BLOCK_HEIGHT        8
61 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
62
63 static const uint32_t pp_null_gen5[][4] = {
64 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
65 };
66
67 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
68 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
69 };
70
71 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
72 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
73 };
74
75 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
76 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
77 };
78
79 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
80 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
81 };
82
83 static const uint32_t pp_nv12_scaling_gen5[][4] = {
84 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
85 };
86
87 static const uint32_t pp_nv12_avs_gen5[][4] = {
88 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
89 };
90
91 static const uint32_t pp_nv12_dndi_gen5[][4] = {
92 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
93 };
94
95 static const uint32_t pp_nv12_dn_gen5[][4] = {
96 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
97 };
98
99 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
100 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
101 };
102
103 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
104 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
105 };
106
107 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
108 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
109 };
110
111 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
112 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
113 };
114
115 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
116 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
117 };
118
119 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
120 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
121 };
122
123 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
124                                    const struct i965_surface *src_surface,
125                                    const VARectangle *src_rect,
126                                    struct i965_surface *dst_surface,
127                                    const VARectangle *dst_rect,
128                                    void *filter_param);
129 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
130                                             const struct i965_surface *src_surface,
131                                             const VARectangle *src_rect,
132                                             struct i965_surface *dst_surface,
133                                             const VARectangle *dst_rect,
134                                             void *filter_param);
135 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
136                                            const struct i965_surface *src_surface,
137                                            const VARectangle *src_rect,
138                                            struct i965_surface *dst_surface,
139                                            const VARectangle *dst_rect,
140                                            void *filter_param);
141 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
142                                              const struct i965_surface *src_surface,
143                                              const VARectangle *src_rect,
144                                              struct i965_surface *dst_surface,
145                                              const VARectangle *dst_rect,
146                                              void *filter_param);
147 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
148                                                 const struct i965_surface *src_surface,
149                                                 const VARectangle *src_rect,
150                                                 struct i965_surface *dst_surface,
151                                                 const VARectangle *dst_rect,
152                                                 void *filter_param);
153 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
154                                         const struct i965_surface *src_surface,
155                                         const VARectangle *src_rect,
156                                         struct i965_surface *dst_surface,
157                                         const VARectangle *dst_rect,
158                                         void *filter_param);
159 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
160                                       const struct i965_surface *src_surface,
161                                       const VARectangle *src_rect,
162                                       struct i965_surface *dst_surface,
163                                       const VARectangle *dst_rect,
164                                       void *filter_param);
165
166 static struct pp_module pp_modules_gen5[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen5,
172             sizeof(pp_null_gen5),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12_NV12",
182             PP_NV12_LOAD_SAVE_N12,
183             pp_nv12_load_save_nv12_gen5,
184             sizeof(pp_nv12_load_save_nv12_gen5),
185             NULL,
186         },
187
188         pp_plx_load_save_plx_initialize,
189     },
190
191     {
192         {
193             "NV12_PL3",
194             PP_NV12_LOAD_SAVE_PL3,
195             pp_nv12_load_save_pl3_gen5,
196             sizeof(pp_nv12_load_save_pl3_gen5),
197             NULL,
198         },
199
200         pp_plx_load_save_plx_initialize,
201     },
202
203     {
204         {
205             "PL3_NV12",
206             PP_PL3_LOAD_SAVE_N12,
207             pp_pl3_load_save_nv12_gen5,
208             sizeof(pp_pl3_load_save_nv12_gen5),
209             NULL,
210         },
211
212         pp_plx_load_save_plx_initialize,
213     },
214
215     {
216         {
217             "PL3_PL3",
218             PP_PL3_LOAD_SAVE_N12,
219             pp_pl3_load_save_pl3_gen5,
220             sizeof(pp_pl3_load_save_pl3_gen5),
221             NULL,
222         },
223
224         pp_plx_load_save_plx_initialize
225     },
226
227     {
228         {
229             "NV12 Scaling module",
230             PP_NV12_SCALING,
231             pp_nv12_scaling_gen5,
232             sizeof(pp_nv12_scaling_gen5),
233             NULL,
234         },
235
236         pp_nv12_scaling_initialize,
237     },
238
239     {
240         {
241             "NV12 AVS module",
242             PP_NV12_AVS,
243             pp_nv12_avs_gen5,
244             sizeof(pp_nv12_avs_gen5),
245             NULL,
246         },
247
248         pp_nv12_avs_initialize_nlas,
249     },
250
251     {
252         {
253             "NV12 DNDI module",
254             PP_NV12_DNDI,
255             pp_nv12_dndi_gen5,
256             sizeof(pp_nv12_dndi_gen5),
257             NULL,
258         },
259
260         pp_nv12_dndi_initialize,
261     },
262
263     {
264         {
265             "NV12 DN module",
266             PP_NV12_DN,
267             pp_nv12_dn_gen5,
268             sizeof(pp_nv12_dn_gen5),
269             NULL,
270         },
271
272         pp_nv12_dn_initialize,
273     },
274
275     {
276         {
277             "NV12_PA module",
278             PP_NV12_LOAD_SAVE_PA,
279             pp_nv12_load_save_pa_gen5,
280             sizeof(pp_nv12_load_save_pa_gen5),
281             NULL,
282         },
283     
284         pp_plx_load_save_plx_initialize,
285     },
286
287     {
288         {
289             "PL3_PA module",
290             PP_PL3_LOAD_SAVE_PA,
291             pp_pl3_load_save_pa_gen5,
292             sizeof(pp_pl3_load_save_pa_gen5),
293             NULL,
294         },
295     
296         pp_plx_load_save_plx_initialize,
297     },
298
299     {
300         {
301             "PA_NV12 module",
302             PP_PA_LOAD_SAVE_NV12,
303             pp_pa_load_save_nv12_gen5,
304             sizeof(pp_pa_load_save_nv12_gen5),
305             NULL,
306         },
307     
308         pp_plx_load_save_plx_initialize,
309     },
310
311     {
312         {
313             "PA_PL3 module",
314             PP_PA_LOAD_SAVE_PL3,
315             pp_pa_load_save_pl3_gen5,
316             sizeof(pp_pa_load_save_pl3_gen5),
317             NULL,
318         },
319     
320         pp_plx_load_save_plx_initialize,
321     },
322
323     {
324         {
325             "RGBX_NV12 module",
326             PP_RGBX_LOAD_SAVE_NV12,
327             pp_rgbx_load_save_nv12_gen5,
328             sizeof(pp_rgbx_load_save_nv12_gen5),
329             NULL,
330         },
331     
332         pp_plx_load_save_plx_initialize,
333     },
334             
335     {
336         {
337             "NV12_RGBX module",
338             PP_NV12_LOAD_SAVE_RGBX,
339             pp_nv12_load_save_rgbx_gen5,
340             sizeof(pp_nv12_load_save_rgbx_gen5),
341             NULL,
342         },
343     
344         pp_plx_load_save_plx_initialize,
345     },
346                     
347 };
348
349 static const uint32_t pp_null_gen6[][4] = {
350 #include "shaders/post_processing/gen5_6/null.g6b"
351 };
352
353 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
354 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
355 };
356
357 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
358 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
359 };
360
361 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
362 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
363 };
364
365 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
366 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
367 };
368
369 static const uint32_t pp_nv12_scaling_gen6[][4] = {
370 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
371 };
372
373 static const uint32_t pp_nv12_avs_gen6[][4] = {
374 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
375 };
376
377 static const uint32_t pp_nv12_dndi_gen6[][4] = {
378 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
379 };
380
381 static const uint32_t pp_nv12_dn_gen6[][4] = {
382 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
383 };
384
385 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
386 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
387 };
388
389 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
390 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
391 };
392
393 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
394 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
395 };
396
397 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
398 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
399 };
400
401 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
402 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
403 };
404
405 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
406 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
407 };
408
409 static struct pp_module pp_modules_gen6[] = {
410     {
411         {
412             "NULL module (for testing)",
413             PP_NULL,
414             pp_null_gen6,
415             sizeof(pp_null_gen6),
416             NULL,
417         },
418
419         pp_null_initialize,
420     },
421
422     {
423         {
424             "NV12_NV12",
425             PP_NV12_LOAD_SAVE_N12,
426             pp_nv12_load_save_nv12_gen6,
427             sizeof(pp_nv12_load_save_nv12_gen6),
428             NULL,
429         },
430
431         pp_plx_load_save_plx_initialize,
432     },
433
434     {
435         {
436             "NV12_PL3",
437             PP_NV12_LOAD_SAVE_PL3,
438             pp_nv12_load_save_pl3_gen6,
439             sizeof(pp_nv12_load_save_pl3_gen6),
440             NULL,
441         },
442         
443         pp_plx_load_save_plx_initialize,
444     },
445
446     {
447         {
448             "PL3_NV12",
449             PP_PL3_LOAD_SAVE_N12,
450             pp_pl3_load_save_nv12_gen6,
451             sizeof(pp_pl3_load_save_nv12_gen6),
452             NULL,
453         },
454
455         pp_plx_load_save_plx_initialize,
456     },
457
458     {
459         {
460             "PL3_PL3",
461             PP_PL3_LOAD_SAVE_N12,
462             pp_pl3_load_save_pl3_gen6,
463             sizeof(pp_pl3_load_save_pl3_gen6),
464             NULL,
465         },
466
467         pp_plx_load_save_plx_initialize,
468     },
469
470     {
471         {
472             "NV12 Scaling module",
473             PP_NV12_SCALING,
474             pp_nv12_scaling_gen6,
475             sizeof(pp_nv12_scaling_gen6),
476             NULL,
477         },
478
479         gen6_nv12_scaling_initialize,
480     },
481
482     {
483         {
484             "NV12 AVS module",
485             PP_NV12_AVS,
486             pp_nv12_avs_gen6,
487             sizeof(pp_nv12_avs_gen6),
488             NULL,
489         },
490
491         pp_nv12_avs_initialize_nlas,
492     },
493
494     {
495         {
496             "NV12 DNDI module",
497             PP_NV12_DNDI,
498             pp_nv12_dndi_gen6,
499             sizeof(pp_nv12_dndi_gen6),
500             NULL,
501         },
502
503         pp_nv12_dndi_initialize,
504     },
505
506     {
507         {
508             "NV12 DN module",
509             PP_NV12_DN,
510             pp_nv12_dn_gen6,
511             sizeof(pp_nv12_dn_gen6),
512             NULL,
513         },
514
515         pp_nv12_dn_initialize,
516     },
517     {
518         {
519             "NV12_PA module",
520             PP_NV12_LOAD_SAVE_PA,
521             pp_nv12_load_save_pa_gen6,
522             sizeof(pp_nv12_load_save_pa_gen6),
523             NULL,
524         },
525     
526         pp_plx_load_save_plx_initialize,
527     },
528     
529     {
530         {
531             "PL3_PA module",
532             PP_PL3_LOAD_SAVE_PA,
533             pp_pl3_load_save_pa_gen6,
534             sizeof(pp_pl3_load_save_pa_gen6),
535             NULL,
536         },
537     
538         pp_plx_load_save_plx_initialize,
539     },
540     
541     {
542         {
543             "PA_NV12 module",
544             PP_PA_LOAD_SAVE_NV12,
545             pp_pa_load_save_nv12_gen6,
546             sizeof(pp_pa_load_save_nv12_gen6),
547             NULL,
548         },
549     
550         pp_plx_load_save_plx_initialize,
551     },
552
553     {
554         {
555             "PA_PL3 module",
556             PP_PA_LOAD_SAVE_PL3,
557             pp_pa_load_save_pl3_gen6,
558             sizeof(pp_pa_load_save_pl3_gen6),
559             NULL,
560         },
561     
562         pp_plx_load_save_plx_initialize,
563     },
564     
565     {
566         {
567             "RGBX_NV12 module",
568             PP_RGBX_LOAD_SAVE_NV12,
569             pp_rgbx_load_save_nv12_gen6,
570             sizeof(pp_rgbx_load_save_nv12_gen6),
571             NULL,
572         },
573     
574         pp_plx_load_save_plx_initialize,
575     },
576
577     {
578         {
579             "NV12_RGBX module",
580             PP_NV12_LOAD_SAVE_RGBX,
581             pp_nv12_load_save_rgbx_gen6,
582             sizeof(pp_nv12_load_save_rgbx_gen6),
583             NULL,
584         },
585     
586         pp_plx_load_save_plx_initialize,
587     },
588 };
589
590 static const uint32_t pp_null_gen7[][4] = {
591 };
592
593 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
594 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
595 };
596
597 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
598 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
599 };
600
601 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
602 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
603 };
604
605 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
606 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
607 };
608
609 static const uint32_t pp_nv12_scaling_gen7[][4] = {
610 #include "shaders/post_processing/gen7/avs.g7b"
611 };
612
613 static const uint32_t pp_nv12_avs_gen7[][4] = {
614 #include "shaders/post_processing/gen7/avs.g7b"
615 };
616
617 static const uint32_t pp_nv12_dndi_gen7[][4] = {
618 #include "shaders/post_processing/gen7/dndi.g7b"
619 };
620
621 static const uint32_t pp_nv12_dn_gen7[][4] = {
622 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
623 };
624 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
625 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
626 };
627 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
628 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
629 };
630 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
632 };
633 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
634 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
635 };
636 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
637 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
638 };
639 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
640 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
641 };
642
643 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
644                                            const struct i965_surface *src_surface,
645                                            const VARectangle *src_rect,
646                                            struct i965_surface *dst_surface,
647                                            const VARectangle *dst_rect,
648                                            void *filter_param);
649 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
650                                              const struct i965_surface *src_surface,
651                                              const VARectangle *src_rect,
652                                              struct i965_surface *dst_surface,
653                                              const VARectangle *dst_rect,
654                                              void *filter_param);
655 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
656                                            const struct i965_surface *src_surface,
657                                            const VARectangle *src_rect,
658                                            struct i965_surface *dst_surface,
659                                            const VARectangle *dst_rect,
660                                            void *filter_param);
661
662 static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
663                                            const struct i965_surface *src_surface,
664                                            const VARectangle *src_rect,
665                                            struct i965_surface *dst_surface,
666                                            const VARectangle *dst_rect,
667                                            void *filter_param);
668
669 static struct pp_module pp_modules_gen7[] = {
670     {
671         {
672             "NULL module (for testing)",
673             PP_NULL,
674             pp_null_gen7,
675             sizeof(pp_null_gen7),
676             NULL,
677         },
678
679         pp_null_initialize,
680     },
681
682     {
683         {
684             "NV12_NV12",
685             PP_NV12_LOAD_SAVE_N12,
686             pp_nv12_load_save_nv12_gen7,
687             sizeof(pp_nv12_load_save_nv12_gen7),
688             NULL,
689         },
690
691         gen7_pp_plx_avs_initialize,
692     },
693
694     {
695         {
696             "NV12_PL3",
697             PP_NV12_LOAD_SAVE_PL3,
698             pp_nv12_load_save_pl3_gen7,
699             sizeof(pp_nv12_load_save_pl3_gen7),
700             NULL,
701         },
702         
703         gen7_pp_plx_avs_initialize,
704     },
705
706     {
707         {
708             "PL3_NV12",
709             PP_PL3_LOAD_SAVE_N12,
710             pp_pl3_load_save_nv12_gen7,
711             sizeof(pp_pl3_load_save_nv12_gen7),
712             NULL,
713         },
714
715         gen7_pp_plx_avs_initialize,
716     },
717
718     {
719         {
720             "PL3_PL3",
721             PP_PL3_LOAD_SAVE_N12,
722             pp_pl3_load_save_pl3_gen7,
723             sizeof(pp_pl3_load_save_pl3_gen7),
724             NULL,
725         },
726
727         gen7_pp_plx_avs_initialize,
728     },
729
730     {
731         {
732             "NV12 Scaling module",
733             PP_NV12_SCALING,
734             pp_nv12_scaling_gen7,
735             sizeof(pp_nv12_scaling_gen7),
736             NULL,
737         },
738
739         gen7_pp_plx_avs_initialize,
740     },
741
742     {
743         {
744             "NV12 AVS module",
745             PP_NV12_AVS,
746             pp_nv12_avs_gen7,
747             sizeof(pp_nv12_avs_gen7),
748             NULL,
749         },
750
751         gen7_pp_plx_avs_initialize,
752     },
753
754     {
755         {
756             "NV12 DNDI module",
757             PP_NV12_DNDI,
758             pp_nv12_dndi_gen7,
759             sizeof(pp_nv12_dndi_gen7),
760             NULL,
761         },
762
763         gen7_pp_nv12_dndi_initialize,
764     },
765
766     {
767         {
768             "NV12 DN module",
769             PP_NV12_DN,
770             pp_nv12_dn_gen7,
771             sizeof(pp_nv12_dn_gen7),
772             NULL,
773         },
774
775         gen7_pp_nv12_dn_initialize,
776     },
777     {
778         {
779             "NV12_PA module",
780             PP_NV12_LOAD_SAVE_PA,
781             pp_nv12_load_save_pa_gen7,
782             sizeof(pp_nv12_load_save_pa_gen7),
783             NULL,
784         },
785     
786         gen7_pp_plx_avs_initialize,
787     },
788
789     {
790         {
791             "PL3_PA module",
792             PP_PL3_LOAD_SAVE_PA,
793             pp_pl3_load_save_pa_gen7,
794             sizeof(pp_pl3_load_save_pa_gen7),
795             NULL,
796         },
797     
798         gen7_pp_plx_avs_initialize,
799     },
800
801     {
802         {
803             "PA_NV12 module",
804             PP_PA_LOAD_SAVE_NV12,
805             pp_pa_load_save_nv12_gen7,
806             sizeof(pp_pa_load_save_nv12_gen7),
807             NULL,
808         },
809     
810         gen7_pp_plx_avs_initialize,
811     },
812
813     {
814         {
815             "PA_PL3 module",
816             PP_PA_LOAD_SAVE_PL3,
817             pp_pa_load_save_pl3_gen7,
818             sizeof(pp_pa_load_save_pl3_gen7),
819             NULL,
820         },
821     
822         gen7_pp_plx_avs_initialize,
823     },
824     
825     {
826         {
827             "RGBX_NV12 module",
828             PP_RGBX_LOAD_SAVE_NV12,
829             pp_rgbx_load_save_nv12_gen7,
830             sizeof(pp_rgbx_load_save_nv12_gen7),
831             NULL,
832         },
833     
834         gen7_pp_rgbx_avs_initialize,
835     },
836
837     {
838         {
839             "NV12_RGBX module",
840             PP_NV12_LOAD_SAVE_RGBX,
841             pp_nv12_load_save_rgbx_gen7,
842             sizeof(pp_nv12_load_save_rgbx_gen7),
843             NULL,
844         },
845     
846         gen7_pp_plx_avs_initialize,
847     },
848             
849 };
850
851 static const uint32_t pp_null_gen75[][4] = {
852 };
853
854 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
855 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
856 };
857
858 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
859 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
860 };
861
862 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
863 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
864 };
865
866 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
867 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
868 };
869
870 static const uint32_t pp_nv12_scaling_gen75[][4] = {
871 #include "shaders/post_processing/gen7/avs.g75b"
872 };
873
874 static const uint32_t pp_nv12_avs_gen75[][4] = {
875 #include "shaders/post_processing/gen7/avs.g75b"
876 };
877
878 static const uint32_t pp_nv12_dndi_gen75[][4] = {
879 // #include "shaders/post_processing/gen7/dndi.g75b"
880 };
881
882 static const uint32_t pp_nv12_dn_gen75[][4] = {
883 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
884 };
885 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
886 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
887 };
888 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
889 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
890 };
891 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
892 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
893 };
894 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
895 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
896 };
897 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
898 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
899 };
900 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
902 };
903
904 static struct pp_module pp_modules_gen75[] = {
905     {
906         {
907             "NULL module (for testing)",
908             PP_NULL,
909             pp_null_gen75,
910             sizeof(pp_null_gen75),
911             NULL,
912         },
913
914         pp_null_initialize,
915     },
916
917     {
918         {
919             "NV12_NV12",
920             PP_NV12_LOAD_SAVE_N12,
921             pp_nv12_load_save_nv12_gen75,
922             sizeof(pp_nv12_load_save_nv12_gen75),
923             NULL,
924         },
925
926         gen7_pp_plx_avs_initialize,
927     },
928
929     {
930         {
931             "NV12_PL3",
932             PP_NV12_LOAD_SAVE_PL3,
933             pp_nv12_load_save_pl3_gen75,
934             sizeof(pp_nv12_load_save_pl3_gen75),
935             NULL,
936         },
937         
938         gen7_pp_plx_avs_initialize,
939     },
940
941     {
942         {
943             "PL3_NV12",
944             PP_PL3_LOAD_SAVE_N12,
945             pp_pl3_load_save_nv12_gen75,
946             sizeof(pp_pl3_load_save_nv12_gen75),
947             NULL,
948         },
949
950         gen7_pp_plx_avs_initialize,
951     },
952
953     {
954         {
955             "PL3_PL3",
956             PP_PL3_LOAD_SAVE_N12,
957             pp_pl3_load_save_pl3_gen75,
958             sizeof(pp_pl3_load_save_pl3_gen75),
959             NULL,
960         },
961
962         gen7_pp_plx_avs_initialize,
963     },
964
965     {
966         {
967             "NV12 Scaling module",
968             PP_NV12_SCALING,
969             pp_nv12_scaling_gen75,
970             sizeof(pp_nv12_scaling_gen75),
971             NULL,
972         },
973
974         gen7_pp_plx_avs_initialize,
975     },
976
977     {
978         {
979             "NV12 AVS module",
980             PP_NV12_AVS,
981             pp_nv12_avs_gen75,
982             sizeof(pp_nv12_avs_gen75),
983             NULL,
984         },
985
986         gen7_pp_plx_avs_initialize,
987     },
988
989     {
990         {
991             "NV12 DNDI module",
992             PP_NV12_DNDI,
993             pp_nv12_dndi_gen75,
994             sizeof(pp_nv12_dndi_gen75),
995             NULL,
996         },
997
998         gen7_pp_nv12_dndi_initialize,
999     },
1000
1001     {
1002         {
1003             "NV12 DN module",
1004             PP_NV12_DN,
1005             pp_nv12_dn_gen75,
1006             sizeof(pp_nv12_dn_gen75),
1007             NULL,
1008         },
1009
1010         gen7_pp_nv12_dn_initialize,
1011     },
1012     {
1013         {
1014             "NV12_PA module",
1015             PP_NV12_LOAD_SAVE_PA,
1016             pp_nv12_load_save_pa_gen75,
1017             sizeof(pp_nv12_load_save_pa_gen75),
1018             NULL,
1019         },
1020     
1021         gen7_pp_plx_avs_initialize,
1022     },
1023
1024     {
1025         {
1026             "PL3_PA module",
1027             PP_PL3_LOAD_SAVE_PA,
1028             pp_pl3_load_save_pa_gen75,
1029             sizeof(pp_pl3_load_save_pa_gen75),
1030             NULL,
1031         },
1032     
1033         gen7_pp_plx_avs_initialize,
1034     },
1035
1036     {
1037         {
1038             "PA_NV12 module",
1039             PP_PA_LOAD_SAVE_NV12,
1040             pp_pa_load_save_nv12_gen75,
1041             sizeof(pp_pa_load_save_nv12_gen75),
1042             NULL,
1043         },
1044     
1045         gen7_pp_plx_avs_initialize,
1046     },
1047
1048     {
1049         {
1050             "PA_PL3 module",
1051             PP_PA_LOAD_SAVE_PL3,
1052             pp_pa_load_save_pl3_gen75,
1053             sizeof(pp_pa_load_save_pl3_gen75),
1054             NULL,
1055         },
1056     
1057         gen7_pp_plx_avs_initialize,
1058     },
1059     
1060     {
1061         {
1062             "RGBX_NV12 module",
1063             PP_RGBX_LOAD_SAVE_NV12,
1064             pp_rgbx_load_save_nv12_gen75,
1065             sizeof(pp_rgbx_load_save_nv12_gen75),
1066             NULL,
1067         },
1068     
1069         gen7_pp_rgbx_avs_initialize,
1070     },
1071
1072     {
1073         {
1074             "NV12_RGBX module",
1075             PP_NV12_LOAD_SAVE_RGBX,
1076             pp_nv12_load_save_rgbx_gen75,
1077             sizeof(pp_nv12_load_save_rgbx_gen75),
1078             NULL,
1079         },
1080     
1081         gen7_pp_plx_avs_initialize,
1082     },
1083             
1084 };
1085
1086 static int
1087 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1088 {
1089     struct i965_driver_data *i965 = i965_driver_data(ctx);
1090     int fourcc;
1091
1092     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1093         struct object_image *obj_image = IMAGE(surface->id);
1094         fourcc = obj_image->image.format.fourcc;
1095     } else {
1096         struct object_surface *obj_surface = SURFACE(surface->id);
1097         fourcc = obj_surface->fourcc;
1098     }
1099
1100     return fourcc;
1101 }
1102
1103 static void
1104 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1105 {
1106     switch (tiling) {
1107     case I915_TILING_NONE:
1108         ss->ss3.tiled_surface = 0;
1109         ss->ss3.tile_walk = 0;
1110         break;
1111     case I915_TILING_X:
1112         ss->ss3.tiled_surface = 1;
1113         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1114         break;
1115     case I915_TILING_Y:
1116         ss->ss3.tiled_surface = 1;
1117         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1118         break;
1119     }
1120 }
1121
1122 static void
1123 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1124 {
1125     switch (tiling) {
1126     case I915_TILING_NONE:
1127         ss->ss2.tiled_surface = 0;
1128         ss->ss2.tile_walk = 0;
1129         break;
1130     case I915_TILING_X:
1131         ss->ss2.tiled_surface = 1;
1132         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1133         break;
1134     case I915_TILING_Y:
1135         ss->ss2.tiled_surface = 1;
1136         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1137         break;
1138     }
1139 }
1140
1141 static void
1142 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1143 {
1144     switch (tiling) {
1145     case I915_TILING_NONE:
1146         ss->ss0.tiled_surface = 0;
1147         ss->ss0.tile_walk = 0;
1148         break;
1149     case I915_TILING_X:
1150         ss->ss0.tiled_surface = 1;
1151         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1152         break;
1153     case I915_TILING_Y:
1154         ss->ss0.tiled_surface = 1;
1155         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1156         break;
1157     }
1158 }
1159
1160 static void
1161 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1162 {
1163     switch (tiling) {
1164     case I915_TILING_NONE:
1165         ss->ss2.tiled_surface = 0;
1166         ss->ss2.tile_walk = 0;
1167         break;
1168     case I915_TILING_X:
1169         ss->ss2.tiled_surface = 1;
1170         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1171         break;
1172     case I915_TILING_Y:
1173         ss->ss2.tiled_surface = 1;
1174         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1175         break;
1176     }
1177 }
1178
1179 static void
1180 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1181 {
1182     struct i965_interface_descriptor *desc;
1183     dri_bo *bo;
1184     int pp_index = pp_context->current_pp;
1185
1186     bo = pp_context->idrt.bo;
1187     dri_bo_map(bo, 1);
1188     assert(bo->virtual);
1189     desc = bo->virtual;
1190     memset(desc, 0, sizeof(*desc));
1191     desc->desc0.grf_reg_blocks = 10;
1192     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1193     desc->desc1.const_urb_entry_read_offset = 0;
1194     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1195     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1196     desc->desc2.sampler_count = 0;
1197     desc->desc3.binding_table_entry_count = 0;
1198     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1199
1200     dri_bo_emit_reloc(bo,
1201                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1202                       desc->desc0.grf_reg_blocks,
1203                       offsetof(struct i965_interface_descriptor, desc0),
1204                       pp_context->pp_modules[pp_index].kernel.bo);
1205
1206     dri_bo_emit_reloc(bo,
1207                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1208                       desc->desc2.sampler_count << 2,
1209                       offsetof(struct i965_interface_descriptor, desc2),
1210                       pp_context->sampler_state_table.bo);
1211
1212     dri_bo_unmap(bo);
1213     pp_context->idrt.num_interface_descriptors++;
1214 }
1215
1216 static void
1217 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1218 {
1219     struct i965_vfe_state *vfe_state;
1220     dri_bo *bo;
1221
1222     bo = pp_context->vfe_state.bo;
1223     dri_bo_map(bo, 1);
1224     assert(bo->virtual);
1225     vfe_state = bo->virtual;
1226     memset(vfe_state, 0, sizeof(*vfe_state));
1227     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1228     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1229     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1230     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1231     vfe_state->vfe1.children_present = 0;
1232     vfe_state->vfe2.interface_descriptor_base = 
1233         pp_context->idrt.bo->offset >> 4; /* reloc */
1234     dri_bo_emit_reloc(bo,
1235                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1236                       0,
1237                       offsetof(struct i965_vfe_state, vfe2),
1238                       pp_context->idrt.bo);
1239     dri_bo_unmap(bo);
1240 }
1241
1242 static void
1243 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1244 {
1245     unsigned char *constant_buffer;
1246     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1247
1248     assert(sizeof(*pp_static_parameter) == 128);
1249     dri_bo_map(pp_context->curbe.bo, 1);
1250     assert(pp_context->curbe.bo->virtual);
1251     constant_buffer = pp_context->curbe.bo->virtual;
1252     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1253     dri_bo_unmap(pp_context->curbe.bo);
1254 }
1255
1256 static void
1257 ironlake_pp_states_setup(VADriverContextP ctx,
1258                          struct i965_post_processing_context *pp_context)
1259 {
1260     ironlake_pp_interface_descriptor_table(pp_context);
1261     ironlake_pp_vfe_state(pp_context);
1262     ironlake_pp_upload_constants(pp_context);
1263 }
1264
1265 static void
1266 ironlake_pp_pipeline_select(VADriverContextP ctx,
1267                             struct i965_post_processing_context *pp_context)
1268 {
1269     struct intel_batchbuffer *batch = pp_context->batch;
1270
1271     BEGIN_BATCH(batch, 1);
1272     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1273     ADVANCE_BATCH(batch);
1274 }
1275
1276 static void
1277 ironlake_pp_urb_layout(VADriverContextP ctx,
1278                        struct i965_post_processing_context *pp_context)
1279 {
1280     struct intel_batchbuffer *batch = pp_context->batch;
1281     unsigned int vfe_fence, cs_fence;
1282
1283     vfe_fence = pp_context->urb.cs_start;
1284     cs_fence = pp_context->urb.size;
1285
1286     BEGIN_BATCH(batch, 3);
1287     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 
1290               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1291               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1292     ADVANCE_BATCH(batch);
1293 }
1294
1295 static void
1296 ironlake_pp_state_base_address(VADriverContextP ctx,
1297                                struct i965_post_processing_context *pp_context)
1298 {
1299     struct intel_batchbuffer *batch = pp_context->batch;
1300
1301     BEGIN_BATCH(batch, 8);
1302     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1303     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1304     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1305     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1306     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1307     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1308     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1309     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1310     ADVANCE_BATCH(batch);
1311 }
1312
1313 static void
1314 ironlake_pp_state_pointers(VADriverContextP ctx,
1315                            struct i965_post_processing_context *pp_context)
1316 {
1317     struct intel_batchbuffer *batch = pp_context->batch;
1318
1319     BEGIN_BATCH(batch, 3);
1320     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1321     OUT_BATCH(batch, 0);
1322     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1323     ADVANCE_BATCH(batch);
1324 }
1325
1326 static void 
1327 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1328                           struct i965_post_processing_context *pp_context)
1329 {
1330     struct intel_batchbuffer *batch = pp_context->batch;
1331
1332     BEGIN_BATCH(batch, 2);
1333     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1334     OUT_BATCH(batch,
1335               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1336               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1337     ADVANCE_BATCH(batch);
1338 }
1339
1340 static void
1341 ironlake_pp_constant_buffer(VADriverContextP ctx,
1342                             struct i965_post_processing_context *pp_context)
1343 {
1344     struct intel_batchbuffer *batch = pp_context->batch;
1345
1346     BEGIN_BATCH(batch, 2);
1347     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1348     OUT_RELOC(batch, pp_context->curbe.bo,
1349               I915_GEM_DOMAIN_INSTRUCTION, 0,
1350               pp_context->urb.size_cs_entry - 1);
1351     ADVANCE_BATCH(batch);    
1352 }
1353
1354 static void
1355 ironlake_pp_object_walker(VADriverContextP ctx,
1356                           struct i965_post_processing_context *pp_context)
1357 {
1358     struct intel_batchbuffer *batch = pp_context->batch;
1359     int x, x_steps, y, y_steps;
1360     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1361
1362     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1363     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1364
1365     for (y = 0; y < y_steps; y++) {
1366         for (x = 0; x < x_steps; x++) {
1367             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1368                 BEGIN_BATCH(batch, 20);
1369                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1370                 OUT_BATCH(batch, 0);
1371                 OUT_BATCH(batch, 0); /* no indirect data */
1372                 OUT_BATCH(batch, 0);
1373
1374                 /* inline data grf 5-6 */
1375                 assert(sizeof(*pp_inline_parameter) == 64);
1376                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1377
1378                 ADVANCE_BATCH(batch);
1379             }
1380         }
1381     }
1382 }
1383
1384 static void
1385 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1386                            struct i965_post_processing_context *pp_context)
1387 {
1388     struct intel_batchbuffer *batch = pp_context->batch;
1389
1390     intel_batchbuffer_start_atomic(batch, 0x1000);
1391     intel_batchbuffer_emit_mi_flush(batch);
1392     ironlake_pp_pipeline_select(ctx, pp_context);
1393     ironlake_pp_state_base_address(ctx, pp_context);
1394     ironlake_pp_state_pointers(ctx, pp_context);
1395     ironlake_pp_urb_layout(ctx, pp_context);
1396     ironlake_pp_cs_urb_layout(ctx, pp_context);
1397     ironlake_pp_constant_buffer(ctx, pp_context);
1398     ironlake_pp_object_walker(ctx, pp_context);
1399     intel_batchbuffer_end_atomic(batch);
1400 }
1401
1402 // update u/v offset when the surface format are packed yuv
1403 static void i965_update_src_surface_static_parameter(
1404     VADriverContextP    ctx, 
1405     struct i965_post_processing_context *pp_context,
1406     const struct i965_surface *surface)
1407 {
1408     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1409     int fourcc = pp_get_surface_fourcc(ctx, surface);
1410
1411     switch (fourcc) {
1412     case VA_FOURCC('Y', 'U', 'Y', '2'):
1413         pp_static_parameter->grf1.source_packed_u_offset = 1;
1414         pp_static_parameter->grf1.source_packed_v_offset = 3;
1415         break;
1416     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1417         pp_static_parameter->grf1.source_packed_y_offset = 1;
1418         pp_static_parameter->grf1.source_packed_v_offset = 2;
1419         break;
1420     case VA_FOURCC('B', 'G', 'R', 'X'):
1421     case VA_FOURCC('B', 'G', 'R', 'A'):
1422         pp_static_parameter->grf1.source_rgb_layout = 0;
1423         break;
1424     case VA_FOURCC('R', 'G', 'B', 'X'):
1425     case VA_FOURCC('R', 'G', 'B', 'A'):
1426         pp_static_parameter->grf1.source_rgb_layout = 1;
1427         break;
1428     default:
1429         break;
1430     }
1431     
1432 }
1433
1434 static void i965_update_dst_surface_static_parameter(
1435     VADriverContextP    ctx, 
1436     struct i965_post_processing_context *pp_context,
1437     const struct i965_surface *surface)
1438 {
1439     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1440     int fourcc = pp_get_surface_fourcc(ctx, surface);
1441
1442     switch (fourcc) {
1443     case VA_FOURCC('Y', 'U', 'Y', '2'):
1444         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1445         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1446         break;
1447     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1448         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1449         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1450         break;
1451     case VA_FOURCC('B', 'G', 'R', 'X'):
1452     case VA_FOURCC('B', 'G', 'R', 'A'):
1453         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1454         break;
1455     case VA_FOURCC('R', 'G', 'B', 'X'):
1456     case VA_FOURCC('R', 'G', 'B', 'A'):
1457         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1458         break;
1459     default:
1460         break;
1461     }
1462     
1463 }
1464
1465 static void
1466 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1467                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1468                           int width, int height, int pitch, int format, 
1469                           int index, int is_target)
1470 {
1471     struct i965_surface_state *ss;
1472     dri_bo *ss_bo;
1473     unsigned int tiling;
1474     unsigned int swizzle;
1475
1476     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1477     ss_bo = pp_context->surface_state_binding_table.bo;
1478     assert(ss_bo);
1479
1480     dri_bo_map(ss_bo, True);
1481     assert(ss_bo->virtual);
1482     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1483     memset(ss, 0, sizeof(*ss));
1484     ss->ss0.surface_type = I965_SURFACE_2D;
1485     ss->ss0.surface_format = format;
1486     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1487     ss->ss2.width = width - 1;
1488     ss->ss2.height = height - 1;
1489     ss->ss3.pitch = pitch - 1;
1490     pp_set_surface_tiling(ss, tiling);
1491     dri_bo_emit_reloc(ss_bo,
1492                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1493                       surf_bo_offset,
1494                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1495                       surf_bo);
1496     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1497     dri_bo_unmap(ss_bo);
1498 }
1499
1500 static void
1501 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1502                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1503                            int width, int height, int wpitch,
1504                            int xoffset, int yoffset,
1505                            int format, int interleave_chroma,
1506                            int index)
1507 {
1508     struct i965_surface_state2 *ss2;
1509     dri_bo *ss2_bo;
1510     unsigned int tiling;
1511     unsigned int swizzle;
1512
1513     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1514     ss2_bo = pp_context->surface_state_binding_table.bo;
1515     assert(ss2_bo);
1516
1517     dri_bo_map(ss2_bo, True);
1518     assert(ss2_bo->virtual);
1519     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1520     memset(ss2, 0, sizeof(*ss2));
1521     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1522     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1523     ss2->ss1.width = width - 1;
1524     ss2->ss1.height = height - 1;
1525     ss2->ss2.pitch = wpitch - 1;
1526     ss2->ss2.interleave_chroma = interleave_chroma;
1527     ss2->ss2.surface_format = format;
1528     ss2->ss3.x_offset_for_cb = xoffset;
1529     ss2->ss3.y_offset_for_cb = yoffset;
1530     pp_set_surface2_tiling(ss2, tiling);
1531     dri_bo_emit_reloc(ss2_bo,
1532                       I915_GEM_DOMAIN_RENDER, 0,
1533                       surf_bo_offset,
1534                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1535                       surf_bo);
1536     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1537     dri_bo_unmap(ss2_bo);
1538 }
1539
1540 static void
1541 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1542                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1543                           int width, int height, int pitch, int format, 
1544                           int index, int is_target)
1545 {
1546     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1547     struct gen7_surface_state *ss;
1548     dri_bo *ss_bo;
1549     unsigned int tiling;
1550     unsigned int swizzle;
1551
1552     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1553     ss_bo = pp_context->surface_state_binding_table.bo;
1554     assert(ss_bo);
1555
1556     dri_bo_map(ss_bo, True);
1557     assert(ss_bo->virtual);
1558     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1559     memset(ss, 0, sizeof(*ss));
1560     ss->ss0.surface_type = I965_SURFACE_2D;
1561     ss->ss0.surface_format = format;
1562     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1563     ss->ss2.width = width - 1;
1564     ss->ss2.height = height - 1;
1565     ss->ss3.pitch = pitch - 1;
1566     gen7_pp_set_surface_tiling(ss, tiling);
1567     if (IS_HASWELL(i965->intel.device_id))
1568         gen7_render_set_surface_scs(ss);
1569     dri_bo_emit_reloc(ss_bo,
1570                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1571                       surf_bo_offset,
1572                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1573                       surf_bo);
1574     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1575     dri_bo_unmap(ss_bo);
1576 }
1577
1578 static void
1579 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1580                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1581                            int width, int height, int wpitch,
1582                            int xoffset, int yoffset,
1583                            int format, int interleave_chroma,
1584                            int index)
1585 {
1586     struct gen7_surface_state2 *ss2;
1587     dri_bo *ss2_bo;
1588     unsigned int tiling;
1589     unsigned int swizzle;
1590
1591     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1592     ss2_bo = pp_context->surface_state_binding_table.bo;
1593     assert(ss2_bo);
1594
1595     dri_bo_map(ss2_bo, True);
1596     assert(ss2_bo->virtual);
1597     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1598     memset(ss2, 0, sizeof(*ss2));
1599     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1600     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1601     ss2->ss1.width = width - 1;
1602     ss2->ss1.height = height - 1;
1603     ss2->ss2.pitch = wpitch - 1;
1604     ss2->ss2.interleave_chroma = interleave_chroma;
1605     ss2->ss2.surface_format = format;
1606     ss2->ss3.x_offset_for_cb = xoffset;
1607     ss2->ss3.y_offset_for_cb = yoffset;
1608     gen7_pp_set_surface2_tiling(ss2, tiling);
1609     dri_bo_emit_reloc(ss2_bo,
1610                       I915_GEM_DOMAIN_RENDER, 0,
1611                       surf_bo_offset,
1612                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1613                       surf_bo);
1614     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1615     dri_bo_unmap(ss2_bo);
1616 }
1617
1618 static void 
1619 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1620                                 const struct i965_surface *surface, 
1621                                 int base_index, int is_target,
1622                                 int *width, int *height, int *pitch, int *offset)
1623 {
1624     struct i965_driver_data *i965 = i965_driver_data(ctx);
1625     struct object_surface *obj_surface;
1626     struct object_image *obj_image;
1627     dri_bo *bo;
1628     int fourcc = pp_get_surface_fourcc(ctx, surface);
1629     const int Y = 0;
1630     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1631     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1632     const int UV = 1;
1633     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1634     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1635     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1636                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1637                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1638                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1639     int scale_factor_of_1st_plane_width_in_byte = 1;
1640                               
1641     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1642         obj_surface = SURFACE(surface->id);
1643         bo = obj_surface->bo;
1644         width[0] = obj_surface->orig_width;
1645         height[0] = obj_surface->orig_height;
1646         pitch[0] = obj_surface->width;
1647         offset[0] = 0;
1648
1649         if (full_packed_format) {
1650             scale_factor_of_1st_plane_width_in_byte = 4; 
1651             pitch[0] = obj_surface->width * 4;
1652         }
1653         else if (packed_yuv ) {
1654             scale_factor_of_1st_plane_width_in_byte =  2; 
1655             pitch[0] = obj_surface->width * 2;
1656         }
1657         else if (interleaved_uv) {
1658             width[1] = obj_surface->orig_width;
1659             height[1] = obj_surface->orig_height / 2;
1660             pitch[1] = obj_surface->width;
1661             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1662         } else {
1663             width[1] = obj_surface->orig_width / 2;
1664             height[1] = obj_surface->orig_height / 2;
1665             pitch[1] = obj_surface->width / 2;
1666             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1667             width[2] = obj_surface->orig_width / 2;
1668             height[2] = obj_surface->orig_height / 2;
1669             pitch[2] = obj_surface->width / 2;
1670             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1671         }
1672     } else {
1673         obj_image = IMAGE(surface->id);
1674         bo = obj_image->bo;
1675         width[0] = obj_image->image.width;
1676         height[0] = obj_image->image.height;
1677         pitch[0] = obj_image->image.pitches[0];
1678         offset[0] = obj_image->image.offsets[0];
1679
1680         if (full_packed_format) {
1681             scale_factor_of_1st_plane_width_in_byte = 4;
1682         }
1683         else if (packed_yuv ) {
1684             scale_factor_of_1st_plane_width_in_byte = 2;
1685         }
1686         else if (interleaved_uv) {
1687             width[1] = obj_image->image.width;
1688             height[1] = obj_image->image.height / 2;
1689             pitch[1] = obj_image->image.pitches[1];
1690             offset[1] = obj_image->image.offsets[1];
1691         } else {
1692             width[1] = obj_image->image.width / 2;
1693             height[1] = obj_image->image.height / 2;
1694             pitch[1] = obj_image->image.pitches[1];
1695             offset[1] = obj_image->image.offsets[1];
1696             width[2] = obj_image->image.width / 2;
1697             height[2] = obj_image->image.height / 2;
1698             pitch[2] = obj_image->image.pitches[2];
1699             offset[2] = obj_image->image.offsets[2];
1700         }
1701     }
1702
1703     /* Y surface */
1704     i965_pp_set_surface_state(ctx, pp_context,
1705                               bo, offset[Y],
1706                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1707                               base_index, is_target);
1708
1709     if (!packed_yuv && !full_packed_format) {
1710         if (interleaved_uv) {
1711             i965_pp_set_surface_state(ctx, pp_context,
1712                                       bo, offset[UV],
1713                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1714                                       base_index + 1, is_target);
1715         } else {
1716             /* U surface */
1717             i965_pp_set_surface_state(ctx, pp_context,
1718                                       bo, offset[U],
1719                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1720                                       base_index + 1, is_target);
1721
1722             /* V surface */
1723             i965_pp_set_surface_state(ctx, pp_context,
1724                                       bo, offset[V],
1725                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1726                                       base_index + 2, is_target);
1727         }
1728     }
1729
1730 }
1731
1732 static void 
1733 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1734                                      const struct i965_surface *surface, 
1735                                      int base_index, int is_target,
1736                                      int *width, int *height, int *pitch, int *offset)
1737 {
1738     struct i965_driver_data *i965 = i965_driver_data(ctx);
1739     struct object_surface *obj_surface;
1740     struct object_image *obj_image;
1741     dri_bo *bo;
1742     int fourcc = pp_get_surface_fourcc(ctx, surface);
1743     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1744                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1745     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1746                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1747     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1748     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1749     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1750                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1751                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1752                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1753
1754     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1755         obj_surface = SURFACE(surface->id);
1756         bo = obj_surface->bo;
1757         width[0] = obj_surface->orig_width;
1758         height[0] = obj_surface->orig_height;
1759         pitch[0] = obj_surface->width;
1760         offset[0] = 0;
1761
1762         if (packed_yuv) {
1763             if (is_target)
1764                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1765             else
1766                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1767
1768             pitch[0] = obj_surface->width * 2;
1769         } else if (rgbx_format) {
1770             if (is_target)
1771                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
1772         }
1773
1774         width[1] = obj_surface->cb_cr_width;
1775         height[1] = obj_surface->cb_cr_height;
1776         pitch[1] = obj_surface->cb_cr_pitch;
1777         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1778
1779         width[2] = obj_surface->cb_cr_width;
1780         height[2] = obj_surface->cb_cr_height;
1781         pitch[2] = obj_surface->cb_cr_pitch;
1782         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1783     } else {
1784         obj_image = IMAGE(surface->id);
1785         bo = obj_image->bo;
1786         width[0] = obj_image->image.width;
1787         height[0] = obj_image->image.height;
1788         pitch[0] = obj_image->image.pitches[0];
1789         offset[0] = obj_image->image.offsets[0];
1790
1791         if (rgbx_format) {
1792             if (is_target)
1793                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
1794         } else if (packed_yuv) {
1795             if (is_target)
1796                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1797             else
1798                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1799         } else if (interleaved_uv) {
1800             width[1] = obj_image->image.width / 2;
1801             height[1] = obj_image->image.height / 2;
1802             pitch[1] = obj_image->image.pitches[1];
1803             offset[1] = obj_image->image.offsets[1];
1804         } else {
1805             width[1] = obj_image->image.width / 2;
1806             height[1] = obj_image->image.height / 2;
1807             pitch[1] = obj_image->image.pitches[U];
1808             offset[1] = obj_image->image.offsets[U];
1809             width[2] = obj_image->image.width / 2;
1810             height[2] = obj_image->image.height / 2;
1811             pitch[2] = obj_image->image.pitches[V];
1812             offset[2] = obj_image->image.offsets[V];
1813         }
1814     }
1815
1816     if (is_target) {
1817         gen7_pp_set_surface_state(ctx, pp_context,
1818                                   bo, 0,
1819                                   width[0] / 4, height[0], pitch[0],
1820                                   I965_SURFACEFORMAT_R8_UINT,
1821                                   base_index, 1);
1822         if (rgbx_format) {
1823                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1824                 /* the format is MSB: X-B-G-R */
1825                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1826                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
1827                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
1828                         /* It is stored as MSB: X-R-G-B */
1829                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1830                 }
1831         }
1832         if (!packed_yuv && !rgbx_format) {
1833             if (interleaved_uv) {
1834                 gen7_pp_set_surface_state(ctx, pp_context,
1835                                           bo, offset[1],
1836                                           width[1] / 2, height[1], pitch[1],
1837                                           I965_SURFACEFORMAT_R8G8_SINT,
1838                                           base_index + 1, 1);
1839             } else {
1840                 gen7_pp_set_surface_state(ctx, pp_context,
1841                                           bo, offset[1],
1842                                           width[1] / 4, height[1], pitch[1],
1843                                           I965_SURFACEFORMAT_R8_SINT,
1844                                           base_index + 1, 1);
1845                 gen7_pp_set_surface_state(ctx, pp_context,
1846                                           bo, offset[2],
1847                                           width[2] / 4, height[2], pitch[2],
1848                                           I965_SURFACEFORMAT_R8_SINT,
1849                                           base_index + 2, 1);
1850             }
1851         }
1852     } else {
1853         int format0 = SURFACE_FORMAT_Y8_UNORM;
1854
1855         switch (fourcc) {
1856         case VA_FOURCC('Y', 'U', 'Y', '2'):
1857             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1858             break;
1859
1860         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1861             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1862             break;
1863
1864         default:
1865             break;
1866         }
1867         if (rgbx_format) {
1868             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1869             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
1870             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
1871             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
1872             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
1873                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
1874                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
1875             }
1876         }
1877         gen7_pp_set_surface2_state(ctx, pp_context,
1878                                    bo, offset[0],
1879                                    width[0], height[0], pitch[0],
1880                                    0, 0,
1881                                    format0, 0,
1882                                    base_index);
1883
1884         if (!packed_yuv && !rgbx_format) {
1885             if (interleaved_uv) {
1886                 gen7_pp_set_surface2_state(ctx, pp_context,
1887                                            bo, offset[1],
1888                                            width[1], height[1], pitch[1],
1889                                            0, 0,
1890                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1891                                            base_index + 1);
1892             } else {
1893                 gen7_pp_set_surface2_state(ctx, pp_context,
1894                                            bo, offset[1],
1895                                            width[1], height[1], pitch[1],
1896                                            0, 0,
1897                                            SURFACE_FORMAT_R8_UNORM, 0,
1898                                            base_index + 1);
1899                 gen7_pp_set_surface2_state(ctx, pp_context,
1900                                            bo, offset[2],
1901                                            width[2], height[2], pitch[2],
1902                                            0, 0,
1903                                            SURFACE_FORMAT_R8_UNORM, 0,
1904                                            base_index + 2);
1905             }
1906         }
1907     }
1908 }
1909
1910 static int
1911 pp_null_x_steps(void *private_context)
1912 {
1913     return 1;
1914 }
1915
1916 static int
1917 pp_null_y_steps(void *private_context)
1918 {
1919     return 1;
1920 }
1921
1922 static int
1923 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1924 {
1925     return 0;
1926 }
1927
1928 static VAStatus
1929 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1930                    const struct i965_surface *src_surface,
1931                    const VARectangle *src_rect,
1932                    struct i965_surface *dst_surface,
1933                    const VARectangle *dst_rect,
1934                    void *filter_param)
1935 {
1936     /* private function & data */
1937     pp_context->pp_x_steps = pp_null_x_steps;
1938     pp_context->pp_y_steps = pp_null_y_steps;
1939     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1940
1941     dst_surface->flags = src_surface->flags;
1942
1943     return VA_STATUS_SUCCESS;
1944 }
1945
1946 static int
1947 pp_load_save_x_steps(void *private_context)
1948 {
1949     return 1;
1950 }
1951
1952 static int
1953 pp_load_save_y_steps(void *private_context)
1954 {
1955     struct pp_load_save_context *pp_load_save_context = private_context;
1956
1957     return pp_load_save_context->dest_h / 8;
1958 }
1959
1960 static int
1961 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1962 {
1963     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1964     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1965
1966     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1967     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1968
1969     return 0;
1970 }
1971
1972 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1973 {
1974     int i;
1975     /* x offset of dest surface must be dword aligned.
1976      * so we have to extend dst surface on left edge, and mask out pixels not interested
1977      */
1978     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1979         pp_context->block_horizontal_mask_left = 0;
1980         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1981         {
1982             pp_context->block_horizontal_mask_left |= 1<<i;
1983         }
1984     }
1985     else {
1986         pp_context->block_horizontal_mask_left = 0xffff;
1987     }
1988     
1989     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1990     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1991         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1992     }
1993     else {
1994         pp_context->block_horizontal_mask_right = 0xffff;
1995     }
1996     
1997     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1998         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1999     }
2000     else {
2001         pp_context->block_vertical_mask_bottom = 0xff;
2002     }
2003
2004 }
2005 static VAStatus
2006 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2007                                 const struct i965_surface *src_surface,
2008                                 const VARectangle *src_rect,
2009                                 struct i965_surface *dst_surface,
2010                                 const VARectangle *dst_rect,
2011                                 void *filter_param)
2012 {
2013     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
2014     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2015     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2016     int width[3], height[3], pitch[3], offset[3];
2017     const int Y = 0;
2018
2019     /* source surface */
2020     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2021                                     width, height, pitch, offset);
2022
2023     /* destination surface */
2024     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2025                                     width, height, pitch, offset);
2026
2027     /* private function & data */
2028     pp_context->pp_x_steps = pp_load_save_x_steps;
2029     pp_context->pp_y_steps = pp_load_save_y_steps;
2030     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2031
2032     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2033     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2034     pp_load_save_context->dest_y = dst_rect->y;
2035     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2036     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2037
2038     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2039     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2040
2041     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2042     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2043
2044     // update u/v offset for packed yuv
2045     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2046     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2047
2048     dst_surface->flags = src_surface->flags;
2049
2050     return VA_STATUS_SUCCESS;
2051 }
2052
2053 static int
2054 pp_scaling_x_steps(void *private_context)
2055 {
2056     return 1;
2057 }
2058
2059 static int
2060 pp_scaling_y_steps(void *private_context)
2061 {
2062     struct pp_scaling_context *pp_scaling_context = private_context;
2063
2064     return pp_scaling_context->dest_h / 8;
2065 }
2066
2067 static int
2068 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2069 {
2070     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2071     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2072     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2073     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2074     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2075
2076     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2077     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2078     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2079     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2080     
2081     return 0;
2082 }
2083
2084 static VAStatus
2085 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2086                            const struct i965_surface *src_surface,
2087                            const VARectangle *src_rect,
2088                            struct i965_surface *dst_surface,
2089                            const VARectangle *dst_rect,
2090                            void *filter_param)
2091 {
2092     struct i965_driver_data *i965 = i965_driver_data(ctx);
2093     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2094     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2095     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2096     struct object_surface *obj_surface;
2097     struct i965_sampler_state *sampler_state;
2098     int in_w, in_h, in_wpitch, in_hpitch;
2099     int out_w, out_h, out_wpitch, out_hpitch;
2100
2101     /* source surface */
2102     obj_surface = SURFACE(src_surface->id);
2103     in_w = obj_surface->orig_width;
2104     in_h = obj_surface->orig_height;
2105     in_wpitch = obj_surface->width;
2106     in_hpitch = obj_surface->height;
2107
2108     /* source Y surface index 1 */
2109     i965_pp_set_surface_state(ctx, pp_context,
2110                               obj_surface->bo, 0,
2111                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2112                               1, 0);
2113
2114     /* source UV surface index 2 */
2115     i965_pp_set_surface_state(ctx, pp_context,
2116                               obj_surface->bo, in_wpitch * in_hpitch,
2117                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2118                               2, 0);
2119
2120     /* destination surface */
2121     obj_surface = SURFACE(dst_surface->id);
2122     out_w = obj_surface->orig_width;
2123     out_h = obj_surface->orig_height;
2124     out_wpitch = obj_surface->width;
2125     out_hpitch = obj_surface->height;
2126
2127     /* destination Y surface index 7 */
2128     i965_pp_set_surface_state(ctx, pp_context,
2129                               obj_surface->bo, 0,
2130                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2131                               7, 1);
2132
2133     /* destination UV surface index 8 */
2134     i965_pp_set_surface_state(ctx, pp_context,
2135                               obj_surface->bo, out_wpitch * out_hpitch,
2136                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2137                               8, 1);
2138
2139     /* sampler state */
2140     dri_bo_map(pp_context->sampler_state_table.bo, True);
2141     assert(pp_context->sampler_state_table.bo->virtual);
2142     sampler_state = pp_context->sampler_state_table.bo->virtual;
2143
2144     /* SIMD16 Y index 1 */
2145     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2146     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2147     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2148     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2149     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2150
2151     /* SIMD16 UV index 2 */
2152     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2153     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2154     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2155     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2156     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2157
2158     dri_bo_unmap(pp_context->sampler_state_table.bo);
2159
2160     /* private function & data */
2161     pp_context->pp_x_steps = pp_scaling_x_steps;
2162     pp_context->pp_y_steps = pp_scaling_y_steps;
2163     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2164
2165     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2166     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2167     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2168     pp_scaling_context->dest_y = dst_rect->y;
2169     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2170     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2171     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2172     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2173
2174     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2175
2176     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2177     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2178     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2179
2180     dst_surface->flags = src_surface->flags;
2181
2182     return VA_STATUS_SUCCESS;
2183 }
2184
2185 static int
2186 pp_avs_x_steps(void *private_context)
2187 {
2188     struct pp_avs_context *pp_avs_context = private_context;
2189
2190     return pp_avs_context->dest_w / 16;
2191 }
2192
2193 static int
2194 pp_avs_y_steps(void *private_context)
2195 {
2196     return 1;
2197 }
2198
2199 static int
2200 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2201 {
2202     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2203     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2204     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2205     float src_x_steping, src_y_steping, video_step_delta;
2206     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2207
2208     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2209         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2210         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2211     } else if (tmp_w >= pp_avs_context->dest_w) {
2212         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2213         pp_inline_parameter->grf6.video_step_delta = 0;
2214         
2215         if (x == 0) {
2216             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2217                 pp_avs_context->src_normalized_x;
2218         } else {
2219             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2220             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2221             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2222                 16 * 15 * video_step_delta / 2;
2223         }
2224     } else {
2225         int n0, n1, n2, nls_left, nls_right;
2226         int factor_a = 5, factor_b = 4;
2227         float f;
2228
2229         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2230         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2231         n2 = tmp_w / (16 * factor_a);
2232         nls_left = n0 + n2;
2233         nls_right = n1 + n2;
2234         f = (float) n2 * 16 / tmp_w;
2235         
2236         if (n0 < 5) {
2237             pp_inline_parameter->grf6.video_step_delta = 0.0;
2238
2239             if (x == 0) {
2240                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2241                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2242             } else {
2243                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2244                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2245                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2246                     16 * 15 * video_step_delta / 2;
2247             }
2248         } else {
2249             if (x < nls_left) {
2250                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2251                 float a = f / (nls_left * 16 * factor_b);
2252                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2253                 
2254                 pp_inline_parameter->grf6.video_step_delta = b;
2255
2256                 if (x == 0) {
2257                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2258                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2259                 } else {
2260                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2261                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2262                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2263                         16 * 15 * video_step_delta / 2;
2264                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2265                 }
2266             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2267                 /* scale the center linearly */
2268                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2269                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2270                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2271                     16 * 15 * video_step_delta / 2;
2272                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2273                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2274             } else {
2275                 float a = f / (nls_right * 16 * factor_b);
2276                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2277
2278                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2279                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2280                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2281                     16 * 15 * video_step_delta / 2;
2282                 pp_inline_parameter->grf6.video_step_delta = -b;
2283
2284                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2285                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2286                 else
2287                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2288             }
2289         }
2290     }
2291
2292     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2293     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2294     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2295     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2296
2297     return 0;
2298 }
2299
2300 static VAStatus
2301 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2302                        const struct i965_surface *src_surface,
2303                        const VARectangle *src_rect,
2304                        struct i965_surface *dst_surface,
2305                        const VARectangle *dst_rect,
2306                        void *filter_param,
2307                        int nlas)
2308 {
2309     struct i965_driver_data *i965 = i965_driver_data(ctx);
2310     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2311     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2312     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2313     struct object_surface *obj_surface;
2314     struct i965_sampler_8x8 *sampler_8x8;
2315     struct i965_sampler_8x8_state *sampler_8x8_state;
2316     int index;
2317     int in_w, in_h, in_wpitch, in_hpitch;
2318     int out_w, out_h, out_wpitch, out_hpitch;
2319     int i;
2320
2321     /* surface */
2322     obj_surface = SURFACE(src_surface->id);
2323     in_w = obj_surface->orig_width;
2324     in_h = obj_surface->orig_height;
2325     in_wpitch = obj_surface->width;
2326     in_hpitch = obj_surface->height;
2327
2328     /* source Y surface index 1 */
2329     i965_pp_set_surface2_state(ctx, pp_context,
2330                                obj_surface->bo, 0,
2331                                in_w, in_h, in_wpitch,
2332                                0, 0,
2333                                SURFACE_FORMAT_Y8_UNORM, 0,
2334                                1);
2335
2336     /* source UV surface index 2 */
2337     i965_pp_set_surface2_state(ctx, pp_context,
2338                                obj_surface->bo, in_wpitch * in_hpitch,
2339                                in_w / 2, in_h / 2, in_wpitch,
2340                                0, 0,
2341                                SURFACE_FORMAT_R8B8_UNORM, 0,
2342                                2);
2343
2344     /* destination surface */
2345     obj_surface = SURFACE(dst_surface->id);
2346     out_w = obj_surface->orig_width;
2347     out_h = obj_surface->orig_height;
2348     out_wpitch = obj_surface->width;
2349     out_hpitch = obj_surface->height;
2350     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2351
2352     /* destination Y surface index 7 */
2353     i965_pp_set_surface_state(ctx, pp_context,
2354                               obj_surface->bo, 0,
2355                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2356                               7, 1);
2357
2358     /* destination UV surface index 8 */
2359     i965_pp_set_surface_state(ctx, pp_context,
2360                               obj_surface->bo, out_wpitch * out_hpitch,
2361                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2362                               8, 1);
2363
2364     /* sampler 8x8 state */
2365     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2366     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2367     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2368     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2369     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2370
2371     for (i = 0; i < 17; i++) {
2372         /* for Y channel, currently ignore */
2373         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2374         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2375         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2376         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2377         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2378         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2379         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2380         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2381         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2382         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2383         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2384         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2385         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2386         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2387         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2388         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2389         /* for U/V channel, 0.25 */
2390         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2391         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2392         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2393         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2394         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2395         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2396         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2397         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2398         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2399         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2400         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2401         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2402         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2403         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2404         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2405         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2406     }
2407
2408     sampler_8x8_state->dw136.default_sharpness_level = 0;
2409     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2410     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2411     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2412     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2413
2414     /* sampler 8x8 */
2415     dri_bo_map(pp_context->sampler_state_table.bo, True);
2416     assert(pp_context->sampler_state_table.bo->virtual);
2417     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2418     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2419
2420     /* sample_8x8 Y index 1 */
2421     index = 1;
2422     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2423     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2424     sampler_8x8[index].dw0.ief_bypass = 1;
2425     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2426     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2427     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2428     sampler_8x8[index].dw2.global_noise_estimation = 22;
2429     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2430     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2431     sampler_8x8[index].dw3.strong_edge_weight = 7;
2432     sampler_8x8[index].dw3.regular_weight = 2;
2433     sampler_8x8[index].dw3.non_edge_weight = 0;
2434     sampler_8x8[index].dw3.gain_factor = 40;
2435     sampler_8x8[index].dw4.steepness_boost = 0;
2436     sampler_8x8[index].dw4.steepness_threshold = 0;
2437     sampler_8x8[index].dw4.mr_boost = 0;
2438     sampler_8x8[index].dw4.mr_threshold = 5;
2439     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2440     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2441     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2442     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2443     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2444     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2445     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2446     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2447     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2448     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2449     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2450     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2451     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2452     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2453     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2454     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2455     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2456     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2457     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2458     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2459     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2460     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2461     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2462     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2463     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2464     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2465     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2466     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2467     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2468     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2469     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2470     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2471     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2472     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2473     sampler_8x8[index].dw13.limiter_boost = 0;
2474     sampler_8x8[index].dw13.minimum_limiter = 10;
2475     sampler_8x8[index].dw13.maximum_limiter = 11;
2476     sampler_8x8[index].dw14.clip_limiter = 130;
2477     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2478                       I915_GEM_DOMAIN_RENDER, 
2479                       0,
2480                       0,
2481                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2482                       pp_context->sampler_state_table.bo_8x8);
2483
2484     /* sample_8x8 UV index 2 */
2485     index = 2;
2486     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2487     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2488     sampler_8x8[index].dw0.ief_bypass = 1;
2489     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2490     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2491     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2492     sampler_8x8[index].dw2.global_noise_estimation = 22;
2493     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2494     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2495     sampler_8x8[index].dw3.strong_edge_weight = 7;
2496     sampler_8x8[index].dw3.regular_weight = 2;
2497     sampler_8x8[index].dw3.non_edge_weight = 0;
2498     sampler_8x8[index].dw3.gain_factor = 40;
2499     sampler_8x8[index].dw4.steepness_boost = 0;
2500     sampler_8x8[index].dw4.steepness_threshold = 0;
2501     sampler_8x8[index].dw4.mr_boost = 0;
2502     sampler_8x8[index].dw4.mr_threshold = 5;
2503     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2504     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2505     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2506     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2507     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2508     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2509     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2510     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2511     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2512     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2513     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2514     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2515     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2516     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2517     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2518     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2519     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2520     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2521     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2522     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2523     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2524     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2525     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2526     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2527     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2528     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2529     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2530     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2531     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2532     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2533     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2534     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2535     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2536     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2537     sampler_8x8[index].dw13.limiter_boost = 0;
2538     sampler_8x8[index].dw13.minimum_limiter = 10;
2539     sampler_8x8[index].dw13.maximum_limiter = 11;
2540     sampler_8x8[index].dw14.clip_limiter = 130;
2541     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2542                       I915_GEM_DOMAIN_RENDER, 
2543                       0,
2544                       0,
2545                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2546                       pp_context->sampler_state_table.bo_8x8);
2547
2548     dri_bo_unmap(pp_context->sampler_state_table.bo);
2549
2550     /* private function & data */
2551     pp_context->pp_x_steps = pp_avs_x_steps;
2552     pp_context->pp_y_steps = pp_avs_y_steps;
2553     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2554
2555     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2556     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2557     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2558     pp_avs_context->dest_y = dst_rect->y;
2559     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2560     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2561     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2562     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2563     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2564     pp_avs_context->src_h = src_rect->height;
2565
2566     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2567     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2568
2569     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2570     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2571     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2572     pp_inline_parameter->grf6.video_step_delta = 0.0;
2573
2574     dst_surface->flags = src_surface->flags;
2575
2576     return VA_STATUS_SUCCESS;
2577 }
2578
2579 static VAStatus
2580 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2581                             const struct i965_surface *src_surface,
2582                             const VARectangle *src_rect,
2583                             struct i965_surface *dst_surface,
2584                             const VARectangle *dst_rect,
2585                             void *filter_param)
2586 {
2587     return pp_nv12_avs_initialize(ctx, pp_context,
2588                                   src_surface,
2589                                   src_rect,
2590                                   dst_surface,
2591                                   dst_rect,
2592                                   filter_param,
2593                                   1);
2594 }
2595
2596 static VAStatus
2597 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2598                              const struct i965_surface *src_surface,
2599                              const VARectangle *src_rect,
2600                              struct i965_surface *dst_surface,
2601                              const VARectangle *dst_rect,
2602                              void *filter_param)
2603 {
2604     return pp_nv12_avs_initialize(ctx, pp_context,
2605                                   src_surface,
2606                                   src_rect,
2607                                   dst_surface,
2608                                   dst_rect,
2609                                   filter_param,
2610                                   0);    
2611 }
2612
2613 static int
2614 gen7_pp_avs_x_steps(void *private_context)
2615 {
2616     struct pp_avs_context *pp_avs_context = private_context;
2617
2618     return pp_avs_context->dest_w / 16;
2619 }
2620
2621 static int
2622 gen7_pp_avs_y_steps(void *private_context)
2623 {
2624     struct pp_avs_context *pp_avs_context = private_context;
2625
2626     return pp_avs_context->dest_h / 16;
2627 }
2628
2629 static int
2630 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2631 {
2632     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2633     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2634
2635     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2636     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2637     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2638     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = 1.0 / pp_avs_context->src_w;
2639
2640     return 0;
2641 }
2642
2643 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2644                                               struct i965_post_processing_context *pp_context,
2645                                               const struct i965_surface *surface)
2646 {
2647     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2648     int fourcc = pp_get_surface_fourcc(ctx, surface);
2649     
2650     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2651         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2652         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2653         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2654     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2655         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2656         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2657         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2658     }
2659 }
2660
2661 static VAStatus
2662 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2663                            const struct i965_surface *src_surface,
2664                            const VARectangle *src_rect,
2665                            struct i965_surface *dst_surface,
2666                            const VARectangle *dst_rect,
2667                            void *filter_param)
2668 {
2669     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2670     struct i965_driver_data *i965 = i965_driver_data(ctx);
2671     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2672     struct gen7_sampler_8x8 *sampler_8x8;
2673     struct i965_sampler_8x8_state *sampler_8x8_state;
2674     int index, i;
2675     int width[3], height[3], pitch[3], offset[3];
2676     int src_width, src_height;
2677
2678     /* source surface */
2679     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2680                                          width, height, pitch, offset);
2681     src_width = width[0];
2682     src_height = height[0];
2683
2684     /* destination surface */
2685     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2686                                          width, height, pitch, offset);
2687
2688     /* sampler 8x8 state */
2689     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2690     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2691     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2692     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2693     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2694
2695     for (i = 0; i < 17; i++) {
2696         float coff;
2697         coff = i;
2698         coff = coff / 16;
2699         /* for Y channel, currently ignore */
2700         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2701         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2702         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2703         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
2704         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2705         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2706         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2707         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2708         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2709         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2710         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2711         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2712         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2713         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2714         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2715         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2716         /* for U/V channel, 0.25 */
2717         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2718         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2719         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
2720         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2721         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2722         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
2723         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2724         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2725         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2726         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2727         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
2728         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2729         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2730         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
2731         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2732         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2733     }
2734
2735     sampler_8x8_state->dw136.default_sharpness_level = 0;
2736     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2737     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2738     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2739     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2740
2741     /* sampler 8x8 */
2742     dri_bo_map(pp_context->sampler_state_table.bo, True);
2743     assert(pp_context->sampler_state_table.bo->virtual);
2744     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2745     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2746
2747     /* sample_8x8 Y index 4 */
2748     index = 4;
2749     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2750     sampler_8x8[index].dw0.global_noise_estimation = 255;
2751     sampler_8x8[index].dw0.ief_bypass = 1;
2752
2753     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2754
2755     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2756     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2757     sampler_8x8[index].dw2.r5x_coefficient = 9;
2758     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2759     sampler_8x8[index].dw2.r5c_coefficient = 3;
2760
2761     sampler_8x8[index].dw3.r3x_coefficient = 27;
2762     sampler_8x8[index].dw3.r3c_coefficient = 5;
2763     sampler_8x8[index].dw3.gain_factor = 40;
2764     sampler_8x8[index].dw3.non_edge_weight = 1;
2765     sampler_8x8[index].dw3.regular_weight = 2;
2766     sampler_8x8[index].dw3.strong_edge_weight = 7;
2767     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2768
2769     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2770                       I915_GEM_DOMAIN_RENDER, 
2771                       0,
2772                       0,
2773                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2774                       pp_context->sampler_state_table.bo_8x8);
2775
2776     /* sample_8x8 UV index 8 */
2777     index = 8;
2778     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2779     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2780     sampler_8x8[index].dw0.global_noise_estimation = 255;
2781     sampler_8x8[index].dw0.ief_bypass = 1;
2782     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2783     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2784     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2785     sampler_8x8[index].dw2.r5x_coefficient = 9;
2786     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2787     sampler_8x8[index].dw2.r5c_coefficient = 3;
2788     sampler_8x8[index].dw3.r3x_coefficient = 27;
2789     sampler_8x8[index].dw3.r3c_coefficient = 5;
2790     sampler_8x8[index].dw3.gain_factor = 40;
2791     sampler_8x8[index].dw3.non_edge_weight = 1;
2792     sampler_8x8[index].dw3.regular_weight = 2;
2793     sampler_8x8[index].dw3.strong_edge_weight = 7;
2794     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2795
2796     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2797                       I915_GEM_DOMAIN_RENDER, 
2798                       0,
2799                       0,
2800                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2801                       pp_context->sampler_state_table.bo_8x8);
2802
2803     /* sampler_8x8 V, index 12 */
2804     index = 12;
2805     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2806     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2807     sampler_8x8[index].dw0.global_noise_estimation = 255;
2808     sampler_8x8[index].dw0.ief_bypass = 1;
2809     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2810     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2811     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2812     sampler_8x8[index].dw2.r5x_coefficient = 9;
2813     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2814     sampler_8x8[index].dw2.r5c_coefficient = 3;
2815     sampler_8x8[index].dw3.r3x_coefficient = 27;
2816     sampler_8x8[index].dw3.r3c_coefficient = 5;
2817     sampler_8x8[index].dw3.gain_factor = 40;
2818     sampler_8x8[index].dw3.non_edge_weight = 1;
2819     sampler_8x8[index].dw3.regular_weight = 2;
2820     sampler_8x8[index].dw3.strong_edge_weight = 7;
2821     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2822
2823     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2824                       I915_GEM_DOMAIN_RENDER, 
2825                       0,
2826                       0,
2827                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2828                       pp_context->sampler_state_table.bo_8x8);
2829
2830     dri_bo_unmap(pp_context->sampler_state_table.bo);
2831
2832     /* private function & data */
2833     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2834     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2835     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2836
2837     pp_avs_context->dest_x = dst_rect->x;
2838     pp_avs_context->dest_y = dst_rect->y;
2839     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2840     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2841     pp_avs_context->src_w = src_rect->width;
2842     pp_avs_context->src_h = src_rect->height;
2843
2844     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2845     dw = MAX(dw, pp_avs_context->dest_w);
2846
2847     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2848     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2849     if (IS_HASWELL(i965->intel.device_id))
2850         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
2851         
2852     pp_static_parameter->grf2.avs_wa_width = dw;
2853     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2854     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2855
2856     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2857     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2858     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
2859     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
2860
2861     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2862
2863     dst_surface->flags = src_surface->flags;
2864
2865     return VA_STATUS_SUCCESS;
2866 }
2867
2868
2869 static VAStatus
2870 gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2871                            const struct i965_surface *src_surface,
2872                            const VARectangle *src_rect,
2873                            struct i965_surface *dst_surface,
2874                            const VARectangle *dst_rect,
2875                            void *filter_param)
2876 {
2877     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2878     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2879     struct gen7_sampler_8x8 *sampler_8x8;
2880     struct i965_sampler_8x8_state *sampler_8x8_state;
2881     int index, i;
2882     int width[3], height[3], pitch[3], offset[3];
2883     int src_width, src_height;
2884
2885     /* source surface */
2886     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2887                                          width, height, pitch, offset);
2888     src_width = width[0];
2889     src_height = height[0];
2890
2891     /* destination surface */
2892     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2893                                          width, height, pitch, offset);
2894
2895     /* sampler 8x8 state */
2896     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2897     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2898     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2899     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2900     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2901
2902     /* The sampler_state setting of RGBX surface will be different with
2903      * that for NV12/I420 surface. 
2904      */
2905     for (i = 0; i < 17; i++) {
2906         float coff;
2907         coff = i;
2908         coff = coff / 16;
2909         /* for Y channel, currently ignore */
2910         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2911         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2912         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2913         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2914         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2915         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2916         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2917         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2918         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2919         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2920         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2921         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2922         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2923         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2924         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2925         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2926         /* for U/V channel, 0.25 */
2927         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2928         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2929         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00;
2930         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2931         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2932         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00;
2933         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2934         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2935         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2936         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2937         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00;
2938         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2939         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2940         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00;
2941         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2942         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2943     }
2944
2945     sampler_8x8_state->dw136.default_sharpness_level = 0;
2946     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
2947     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2948     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2949     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2950
2951     /* sampler 8x8 */
2952     dri_bo_map(pp_context->sampler_state_table.bo, True);
2953     assert(pp_context->sampler_state_table.bo->virtual);
2954     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2955     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2956
2957     /* sample_8x8 Y index 4 */
2958     index = 4;
2959     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2960     sampler_8x8[index].dw0.global_noise_estimation = 255;
2961     sampler_8x8[index].dw0.ief_bypass = 1;
2962
2963     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2964
2965     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2966     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2967     sampler_8x8[index].dw2.r5x_coefficient = 9;
2968     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2969     sampler_8x8[index].dw2.r5c_coefficient = 3;
2970
2971     sampler_8x8[index].dw3.r3x_coefficient = 27;
2972     sampler_8x8[index].dw3.r3c_coefficient = 5;
2973     sampler_8x8[index].dw3.gain_factor = 40;
2974     sampler_8x8[index].dw3.non_edge_weight = 1;
2975     sampler_8x8[index].dw3.regular_weight = 2;
2976     sampler_8x8[index].dw3.strong_edge_weight = 7;
2977     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2978
2979     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2980                       I915_GEM_DOMAIN_RENDER, 
2981                       0,
2982                       0,
2983                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2984                       pp_context->sampler_state_table.bo_8x8);
2985
2986     /* sample_8x8 UV index 8 */
2987     index = 8;
2988     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2989     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2990     sampler_8x8[index].dw0.global_noise_estimation = 255;
2991     sampler_8x8[index].dw0.ief_bypass = 1;
2992     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2993     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2994     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2995     sampler_8x8[index].dw2.r5x_coefficient = 9;
2996     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2997     sampler_8x8[index].dw2.r5c_coefficient = 3;
2998     sampler_8x8[index].dw3.r3x_coefficient = 27;
2999     sampler_8x8[index].dw3.r3c_coefficient = 5;
3000     sampler_8x8[index].dw3.gain_factor = 40;
3001     sampler_8x8[index].dw3.non_edge_weight = 1;
3002     sampler_8x8[index].dw3.regular_weight = 2;
3003     sampler_8x8[index].dw3.strong_edge_weight = 7;
3004     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3005
3006     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3007                       I915_GEM_DOMAIN_RENDER, 
3008                       0,
3009                       0,
3010                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3011                       pp_context->sampler_state_table.bo_8x8);
3012
3013     /* sampler_8x8 V, index 12 */
3014     index = 12;
3015     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3016     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3017     sampler_8x8[index].dw0.global_noise_estimation = 255;
3018     sampler_8x8[index].dw0.ief_bypass = 1;
3019     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3020     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3021     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3022     sampler_8x8[index].dw2.r5x_coefficient = 9;
3023     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3024     sampler_8x8[index].dw2.r5c_coefficient = 3;
3025     sampler_8x8[index].dw3.r3x_coefficient = 27;
3026     sampler_8x8[index].dw3.r3c_coefficient = 5;
3027     sampler_8x8[index].dw3.gain_factor = 40;
3028     sampler_8x8[index].dw3.non_edge_weight = 1;
3029     sampler_8x8[index].dw3.regular_weight = 2;
3030     sampler_8x8[index].dw3.strong_edge_weight = 7;
3031     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3032
3033     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3034                       I915_GEM_DOMAIN_RENDER, 
3035                       0,
3036                       0,
3037                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3038                       pp_context->sampler_state_table.bo_8x8);
3039
3040     dri_bo_unmap(pp_context->sampler_state_table.bo);
3041
3042     /* private function & data */
3043     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3044     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3045     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3046
3047     pp_avs_context->dest_x = dst_rect->x;
3048     pp_avs_context->dest_y = dst_rect->y;
3049     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3050     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3051     pp_avs_context->src_w = src_rect->width;
3052     pp_avs_context->src_h = src_rect->height;
3053
3054     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3055     dw = MAX(dw, pp_avs_context->dest_w);
3056
3057     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3058     pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */
3059     pp_static_parameter->grf2.avs_wa_width = dw;
3060     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
3061     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
3062
3063     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3064     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
3065     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = -(float)pp_avs_context->dest_y / pp_avs_context->dest_h;
3066     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = -(float)pp_avs_context->dest_x / dw;
3067     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3068
3069     dst_surface->flags = src_surface->flags;
3070
3071     return VA_STATUS_SUCCESS;
3072 }
3073
3074 static int
3075 pp_dndi_x_steps(void *private_context)
3076 {
3077     return 1;
3078 }
3079
3080 static int
3081 pp_dndi_y_steps(void *private_context)
3082 {
3083     struct pp_dndi_context *pp_dndi_context = private_context;
3084
3085     return pp_dndi_context->dest_h / 4;
3086 }
3087
3088 static int
3089 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3090 {
3091     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3092
3093     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3094     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3095
3096     return 0;
3097 }
3098
3099 static VAStatus
3100 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3101                         const struct i965_surface *src_surface,
3102                         const VARectangle *src_rect,
3103                         struct i965_surface *dst_surface,
3104                         const VARectangle *dst_rect,
3105                         void *filter_param)
3106 {
3107     struct i965_driver_data *i965 = i965_driver_data(ctx);
3108     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3109     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3110     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3111     struct object_surface *obj_surface;
3112     struct i965_sampler_dndi *sampler_dndi;
3113     int index;
3114     int w, h;
3115     int orig_w, orig_h;
3116     int dndi_top_first = 1;
3117
3118     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3119         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3120
3121     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3122         dndi_top_first = 1;
3123     else
3124         dndi_top_first = 0;
3125
3126     /* surface */
3127     obj_surface = SURFACE(src_surface->id);
3128     orig_w = obj_surface->orig_width;
3129     orig_h = obj_surface->orig_height;
3130     w = obj_surface->width;
3131     h = obj_surface->height;
3132
3133     if (pp_context->stmm.bo == NULL) {
3134         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3135                                            "STMM surface",
3136                                            w * h,
3137                                            4096);
3138         assert(pp_context->stmm.bo);
3139     }
3140
3141     /* source UV surface index 2 */
3142     i965_pp_set_surface_state(ctx, pp_context,
3143                               obj_surface->bo, w * h,
3144                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3145                               2, 0);
3146
3147     /* source YUV surface index 4 */
3148     i965_pp_set_surface2_state(ctx, pp_context,
3149                                obj_surface->bo, 0,
3150                                orig_w, orig_h, w,
3151                                0, h,
3152                                SURFACE_FORMAT_PLANAR_420_8, 1,
3153                                4);
3154
3155     /* source STMM surface index 20 */
3156     i965_pp_set_surface_state(ctx, pp_context,
3157                               pp_context->stmm.bo, 0,
3158                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3159                               20, 1);
3160
3161     /* destination surface */
3162     obj_surface = SURFACE(dst_surface->id);
3163     orig_w = obj_surface->orig_width;
3164     orig_h = obj_surface->orig_height;
3165     w = obj_surface->width;
3166     h = obj_surface->height;
3167
3168     /* destination Y surface index 7 */
3169     i965_pp_set_surface_state(ctx, pp_context,
3170                               obj_surface->bo, 0,
3171                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3172                               7, 1);
3173
3174     /* destination UV surface index 8 */
3175     i965_pp_set_surface_state(ctx, pp_context,
3176                               obj_surface->bo, w * h,
3177                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3178                               8, 1);
3179     /* sampler dndi */
3180     dri_bo_map(pp_context->sampler_state_table.bo, True);
3181     assert(pp_context->sampler_state_table.bo->virtual);
3182     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3183     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3184
3185     /* sample dndi index 1 */
3186     index = 0;
3187     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3188     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3189     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3190     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3191
3192     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3193     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
3194     sampler_dndi[index].dw1.stmm_c2 = 1;
3195     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3196     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3197
3198     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3199     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3200     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3201     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
3202
3203     sampler_dndi[index].dw3.maximum_stmm = 128;
3204     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3205     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3206     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3207     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3208
3209     sampler_dndi[index].dw4.sdi_delta = 8;
3210     sampler_dndi[index].dw4.sdi_threshold = 128;
3211     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3212     sampler_dndi[index].dw4.stmm_shift_up = 0;
3213     sampler_dndi[index].dw4.stmm_shift_down = 0;
3214     sampler_dndi[index].dw4.minimum_stmm = 0;
3215
3216     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
3217     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
3218     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
3219     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
3220
3221     sampler_dndi[index].dw6.dn_enable = 1;
3222     sampler_dndi[index].dw6.di_enable = 1;
3223     sampler_dndi[index].dw6.di_partial = 0;
3224     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3225     sampler_dndi[index].dw6.dndi_stream_id = 0;
3226     sampler_dndi[index].dw6.dndi_first_frame = 1;
3227     sampler_dndi[index].dw6.progressive_dn = 0;
3228     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
3229     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3230     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3231
3232     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3233     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3234     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3235     sampler_dndi[index].dw7.column_width_minus1 = 0;
3236
3237     dri_bo_unmap(pp_context->sampler_state_table.bo);
3238
3239     /* private function & data */
3240     pp_context->pp_x_steps = pp_dndi_x_steps;
3241     pp_context->pp_y_steps = pp_dndi_y_steps;
3242     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3243
3244     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3245     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3246     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3247     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3248
3249     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3250     pp_inline_parameter->grf5.number_blocks = w / 16;
3251     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3252     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3253
3254     pp_dndi_context->dest_w = w;
3255     pp_dndi_context->dest_h = h;
3256
3257     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3258
3259     return VA_STATUS_SUCCESS;
3260 }
3261
3262 static int
3263 pp_dn_x_steps(void *private_context)
3264 {
3265     return 1;
3266 }
3267
3268 static int
3269 pp_dn_y_steps(void *private_context)
3270 {
3271     struct pp_dn_context *pp_dn_context = private_context;
3272
3273     return pp_dn_context->dest_h / 8;
3274 }
3275
3276 static int
3277 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3278 {
3279     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3280
3281     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3282     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3283
3284     return 0;
3285 }
3286
3287 static VAStatus
3288 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3289                       const struct i965_surface *src_surface,
3290                       const VARectangle *src_rect,
3291                       struct i965_surface *dst_surface,
3292                       const VARectangle *dst_rect,
3293                       void *filter_param)
3294 {
3295     struct i965_driver_data *i965 = i965_driver_data(ctx);
3296     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3297     struct object_surface *obj_surface;
3298     struct i965_sampler_dndi *sampler_dndi;
3299     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3300     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3301     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3302     int index;
3303     int w, h;
3304     int orig_w, orig_h;
3305     int dn_strength = 15;
3306     int dndi_top_first = 1;
3307     int dn_progressive = 0;
3308
3309     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3310         dndi_top_first = 1;
3311         dn_progressive = 1;
3312     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3313         dndi_top_first = 1;
3314         dn_progressive = 0;
3315     } else {
3316         dndi_top_first = 0;
3317         dn_progressive = 0;
3318     }
3319
3320     if (dn_filter_param) {
3321         float value = dn_filter_param->value;
3322         
3323         if (value > 1.0)
3324             value = 1.0;
3325         
3326         if (value < 0.0)
3327             value = 0.0;
3328
3329         dn_strength = (int)(value * 31.0F);
3330     }
3331
3332     /* surface */
3333     obj_surface = SURFACE(src_surface->id);
3334     orig_w = obj_surface->orig_width;
3335     orig_h = obj_surface->orig_height;
3336     w = obj_surface->width;
3337     h = obj_surface->height;
3338
3339     if (pp_context->stmm.bo == NULL) {
3340         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3341                                            "STMM surface",
3342                                            w * h,
3343                                            4096);
3344         assert(pp_context->stmm.bo);
3345     }
3346
3347     /* source UV surface index 2 */
3348     i965_pp_set_surface_state(ctx, pp_context,
3349                               obj_surface->bo, w * h,
3350                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3351                               2, 0);
3352
3353     /* source YUV surface index 4 */
3354     i965_pp_set_surface2_state(ctx, pp_context,
3355                                obj_surface->bo, 0,
3356                                orig_w, orig_h, w,
3357                                0, h,
3358                                SURFACE_FORMAT_PLANAR_420_8, 1,
3359                                4);
3360
3361     /* source STMM surface index 20 */
3362     i965_pp_set_surface_state(ctx, pp_context,
3363                               pp_context->stmm.bo, 0,
3364                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3365                               20, 1);
3366
3367     /* destination surface */
3368     obj_surface = SURFACE(dst_surface->id);
3369     orig_w = obj_surface->orig_width;
3370     orig_h = obj_surface->orig_height;
3371     w = obj_surface->width;
3372     h = obj_surface->height;
3373
3374     /* destination Y surface index 7 */
3375     i965_pp_set_surface_state(ctx, pp_context,
3376                               obj_surface->bo, 0,
3377                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3378                               7, 1);
3379
3380     /* destination UV surface index 8 */
3381     i965_pp_set_surface_state(ctx, pp_context,
3382                               obj_surface->bo, w * h,
3383                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3384                               8, 1);
3385     /* sampler dn */
3386     dri_bo_map(pp_context->sampler_state_table.bo, True);
3387     assert(pp_context->sampler_state_table.bo->virtual);
3388     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3389     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3390
3391     /* sample dndi index 1 */
3392     index = 0;
3393     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3394     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3395     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3396     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3397
3398     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3399     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3400     sampler_dndi[index].dw1.stmm_c2 = 0;
3401     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3402     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3403
3404     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3405     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3406     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3407     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3408
3409     sampler_dndi[index].dw3.maximum_stmm = 128;
3410     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3411     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3412     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3413     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3414
3415     sampler_dndi[index].dw4.sdi_delta = 8;
3416     sampler_dndi[index].dw4.sdi_threshold = 128;
3417     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3418     sampler_dndi[index].dw4.stmm_shift_up = 0;
3419     sampler_dndi[index].dw4.stmm_shift_down = 0;
3420     sampler_dndi[index].dw4.minimum_stmm = 0;
3421
3422     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3423     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3424     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3425     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3426
3427     sampler_dndi[index].dw6.dn_enable = 1;
3428     sampler_dndi[index].dw6.di_enable = 0;
3429     sampler_dndi[index].dw6.di_partial = 0;
3430     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3431     sampler_dndi[index].dw6.dndi_stream_id = 1;
3432     sampler_dndi[index].dw6.dndi_first_frame = 1;
3433     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3434     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3435     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3436     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3437
3438     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3439     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3440     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3441     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3442
3443     dri_bo_unmap(pp_context->sampler_state_table.bo);
3444
3445     /* private function & data */
3446     pp_context->pp_x_steps = pp_dn_x_steps;
3447     pp_context->pp_y_steps = pp_dn_y_steps;
3448     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3449
3450     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3451     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3452     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3453     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3454
3455     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3456     pp_inline_parameter->grf5.number_blocks = w / 16;
3457     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3458     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3459
3460     pp_dn_context->dest_w = w;
3461     pp_dn_context->dest_h = h;
3462
3463     dst_surface->flags = src_surface->flags;
3464     
3465     return VA_STATUS_SUCCESS;
3466 }
3467
3468 static int
3469 gen7_pp_dndi_x_steps(void *private_context)
3470 {
3471     struct pp_dndi_context *pp_dndi_context = private_context;
3472
3473     return pp_dndi_context->dest_w / 16;
3474 }
3475
3476 static int
3477 gen7_pp_dndi_y_steps(void *private_context)
3478 {
3479     struct pp_dndi_context *pp_dndi_context = private_context;
3480
3481     return pp_dndi_context->dest_h / 4;
3482 }
3483
3484 static int
3485 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3486 {
3487     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3488
3489     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3490     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3491
3492     return 0;
3493 }
3494
3495 static VAStatus
3496 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3497                              const struct i965_surface *src_surface,
3498                              const VARectangle *src_rect,
3499                              struct i965_surface *dst_surface,
3500                              const VARectangle *dst_rect,
3501                              void *filter_param)
3502 {
3503     struct i965_driver_data *i965 = i965_driver_data(ctx);
3504     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3505     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3506     struct object_surface *obj_surface;
3507     struct gen7_sampler_dndi *sampler_dndi;
3508     int index;
3509     int w, h;
3510     int orig_w, orig_h;
3511     int dndi_top_first = 1;
3512
3513     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3514         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3515
3516     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3517         dndi_top_first = 1;
3518     else
3519         dndi_top_first = 0;
3520
3521     /* surface */
3522     obj_surface = SURFACE(src_surface->id);
3523     orig_w = obj_surface->orig_width;
3524     orig_h = obj_surface->orig_height;
3525     w = obj_surface->width;
3526     h = obj_surface->height;
3527
3528     if (pp_context->stmm.bo == NULL) {
3529         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3530                                            "STMM surface",
3531                                            w * h,
3532                                            4096);
3533         assert(pp_context->stmm.bo);
3534     }
3535
3536     /* source UV surface index 1 */
3537     gen7_pp_set_surface_state(ctx, pp_context,
3538                               obj_surface->bo, w * h,
3539                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3540                               1, 0);
3541
3542     /* source YUV surface index 3 */
3543     gen7_pp_set_surface2_state(ctx, pp_context,
3544                                obj_surface->bo, 0,
3545                                orig_w, orig_h, w,
3546                                0, h,
3547                                SURFACE_FORMAT_PLANAR_420_8, 1,
3548                                3);
3549
3550     /* source (temporal reference) YUV surface index 4 */
3551     gen7_pp_set_surface2_state(ctx, pp_context,
3552                                obj_surface->bo, 0,
3553                                orig_w, orig_h, w,
3554                                0, h,
3555                                SURFACE_FORMAT_PLANAR_420_8, 1,
3556                                4);
3557
3558     /* STMM / History Statistics input surface, index 5 */
3559     gen7_pp_set_surface_state(ctx, pp_context,
3560                               pp_context->stmm.bo, 0,
3561                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3562                               5, 1);
3563
3564     /* destination surface */
3565     obj_surface = SURFACE(dst_surface->id);
3566     orig_w = obj_surface->orig_width;
3567     orig_h = obj_surface->orig_height;
3568     w = obj_surface->width;
3569     h = obj_surface->height;
3570
3571     /* destination(Previous frame) Y surface index 27 */
3572     gen7_pp_set_surface_state(ctx, pp_context,
3573                               obj_surface->bo, 0,
3574                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3575                               27, 1);
3576
3577     /* destination(Previous frame) UV surface index 28 */
3578     gen7_pp_set_surface_state(ctx, pp_context,
3579                               obj_surface->bo, w * h,
3580                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3581                               28, 1);
3582
3583     /* destination(Current frame) Y surface index 30 */
3584     gen7_pp_set_surface_state(ctx, pp_context,
3585                               obj_surface->bo, 0,
3586                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3587                               30, 1);
3588
3589     /* destination(Current frame) UV surface index 31 */
3590     gen7_pp_set_surface_state(ctx, pp_context,
3591                               obj_surface->bo, w * h,
3592                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3593                               31, 1);
3594
3595     /* STMM output surface, index 33 */
3596     gen7_pp_set_surface_state(ctx, pp_context,
3597                               pp_context->stmm.bo, 0,
3598                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3599                               33, 1);
3600
3601
3602     /* sampler dndi */
3603     dri_bo_map(pp_context->sampler_state_table.bo, True);
3604     assert(pp_context->sampler_state_table.bo->virtual);
3605     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3606     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3607
3608     /* sample dndi index 0 */
3609     index = 0;
3610     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3611     sampler_dndi[index].dw0.dnmh_delt = 8;
3612     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3613     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3614     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3615     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3616
3617     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3618     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3619     sampler_dndi[index].dw1.stmm_c2 = 0;
3620     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3621     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3622
3623     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3624     sampler_dndi[index].dw2.bne_edge_th = 1;
3625     sampler_dndi[index].dw2.smooth_mv_th = 0;
3626     sampler_dndi[index].dw2.sad_tight_th = 5;
3627     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3628     sampler_dndi[index].dw2.good_neighbor_th = 4;
3629
3630     sampler_dndi[index].dw3.maximum_stmm = 128;
3631     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3632     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3633     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3634     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3635
3636     sampler_dndi[index].dw4.sdi_delta = 8;
3637     sampler_dndi[index].dw4.sdi_threshold = 128;
3638     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3639     sampler_dndi[index].dw4.stmm_shift_up = 0;
3640     sampler_dndi[index].dw4.stmm_shift_down = 0;
3641     sampler_dndi[index].dw4.minimum_stmm = 0;
3642
3643     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3644     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3645     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3646     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3647
3648     sampler_dndi[index].dw6.dn_enable = 0;
3649     sampler_dndi[index].dw6.di_enable = 1;
3650     sampler_dndi[index].dw6.di_partial = 0;
3651     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3652     sampler_dndi[index].dw6.dndi_stream_id = 1;
3653     sampler_dndi[index].dw6.dndi_first_frame = 1;
3654     sampler_dndi[index].dw6.progressive_dn = 0;
3655     sampler_dndi[index].dw6.mcdi_enable = 0;
3656     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3657     sampler_dndi[index].dw6.cat_th1 = 0;
3658     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3659     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3660
3661     sampler_dndi[index].dw7.sad_tha = 5;
3662     sampler_dndi[index].dw7.sad_thb = 10;
3663     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3664     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3665     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3666     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3667     sampler_dndi[index].dw7.neighborpixel_th = 10;
3668     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3669
3670     dri_bo_unmap(pp_context->sampler_state_table.bo);
3671
3672     /* private function & data */
3673     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3674     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3675     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3676
3677     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3678     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3679     pp_static_parameter->grf1.di_top_field_first = 0;
3680     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3681
3682     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3683     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3684     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3685
3686     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3687     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3688
3689     pp_dndi_context->dest_w = w;
3690     pp_dndi_context->dest_h = h;
3691
3692     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3693
3694     return VA_STATUS_SUCCESS;
3695 }
3696
3697 static int
3698 gen7_pp_dn_x_steps(void *private_context)
3699 {
3700     struct pp_dn_context *pp_dn_context = private_context;
3701
3702     return pp_dn_context->dest_w / 16;
3703 }
3704
3705 static int
3706 gen7_pp_dn_y_steps(void *private_context)
3707 {
3708     struct pp_dn_context *pp_dn_context = private_context;
3709
3710     return pp_dn_context->dest_h / 4;
3711 }
3712
3713 static int
3714 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3715 {
3716     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3717
3718     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3719     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3720
3721     return 0;
3722 }
3723
3724 static VAStatus
3725 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3726                            const struct i965_surface *src_surface,
3727                            const VARectangle *src_rect,
3728                            struct i965_surface *dst_surface,
3729                            const VARectangle *dst_rect,
3730                            void *filter_param)
3731 {
3732     struct i965_driver_data *i965 = i965_driver_data(ctx);
3733     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3734     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3735     struct object_surface *obj_surface;
3736     struct gen7_sampler_dndi *sampler_dn;
3737     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3738     int index;
3739     int w, h;
3740     int orig_w, orig_h;
3741     int dn_strength = 15;
3742     int dndi_top_first = 1;
3743     int dn_progressive = 0;
3744
3745     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3746         dndi_top_first = 1;
3747         dn_progressive = 1;
3748     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3749         dndi_top_first = 1;
3750         dn_progressive = 0;
3751     } else {
3752         dndi_top_first = 0;
3753         dn_progressive = 0;
3754     }
3755
3756     if (dn_filter_param) {
3757         float value = dn_filter_param->value;
3758         
3759         if (value > 1.0)
3760             value = 1.0;
3761         
3762         if (value < 0.0)
3763             value = 0.0;
3764
3765         dn_strength = (int)(value * 31.0F);
3766     }
3767
3768     /* surface */
3769     obj_surface = SURFACE(src_surface->id);
3770     orig_w = obj_surface->orig_width;
3771     orig_h = obj_surface->orig_height;
3772     w = obj_surface->width;
3773     h = obj_surface->height;
3774
3775     if (pp_context->stmm.bo == NULL) {
3776         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3777                                            "STMM surface",
3778                                            w * h,
3779                                            4096);
3780         assert(pp_context->stmm.bo);
3781     }
3782
3783     /* source UV surface index 1 */
3784     gen7_pp_set_surface_state(ctx, pp_context,
3785                               obj_surface->bo, w * h,
3786                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3787                               1, 0);
3788
3789     /* source YUV surface index 3 */
3790     gen7_pp_set_surface2_state(ctx, pp_context,
3791                                obj_surface->bo, 0,
3792                                orig_w, orig_h, w,
3793                                0, h,
3794                                SURFACE_FORMAT_PLANAR_420_8, 1,
3795                                3);
3796
3797     /* source (temporal reference) YUV surface index 4 */
3798     gen7_pp_set_surface2_state(ctx, pp_context,
3799                                obj_surface->bo, 0,
3800                                orig_w, orig_h, w,
3801                                0, h,
3802                                SURFACE_FORMAT_PLANAR_420_8, 1,
3803                                4);
3804
3805     /* STMM / History Statistics input surface, index 5 */
3806     gen7_pp_set_surface_state(ctx, pp_context,
3807                               pp_context->stmm.bo, 0,
3808                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3809                               5, 1);
3810
3811     /* destination surface */
3812     obj_surface = SURFACE(dst_surface->id);
3813     orig_w = obj_surface->orig_width;
3814     orig_h = obj_surface->orig_height;
3815     w = obj_surface->width;
3816     h = obj_surface->height;
3817
3818     /* destination Y surface index 24 */
3819     gen7_pp_set_surface_state(ctx, pp_context,
3820                               obj_surface->bo, 0,
3821                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3822                               24, 1);
3823
3824     /* destination UV surface index 25 */
3825     gen7_pp_set_surface_state(ctx, pp_context,
3826                               obj_surface->bo, w * h,
3827                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3828                               25, 1);
3829
3830     /* sampler dn */
3831     dri_bo_map(pp_context->sampler_state_table.bo, True);
3832     assert(pp_context->sampler_state_table.bo->virtual);
3833     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3834     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3835
3836     /* sample dn index 1 */
3837     index = 0;
3838     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3839     sampler_dn[index].dw0.dnmh_delt = 8;
3840     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3841     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3842     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3843     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3844
3845     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3846     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3847     sampler_dn[index].dw1.stmm_c2 = 0;
3848     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3849     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3850
3851     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3852     sampler_dn[index].dw2.bne_edge_th = 1;
3853     sampler_dn[index].dw2.smooth_mv_th = 0;
3854     sampler_dn[index].dw2.sad_tight_th = 5;
3855     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3856     sampler_dn[index].dw2.good_neighbor_th = 4;
3857
3858     sampler_dn[index].dw3.maximum_stmm = 128;
3859     sampler_dn[index].dw3.multipler_for_vecm = 2;
3860     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3861     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3862     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3863
3864     sampler_dn[index].dw4.sdi_delta = 8;
3865     sampler_dn[index].dw4.sdi_threshold = 128;
3866     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3867     sampler_dn[index].dw4.stmm_shift_up = 0;
3868     sampler_dn[index].dw4.stmm_shift_down = 0;
3869     sampler_dn[index].dw4.minimum_stmm = 0;
3870
3871     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3872     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3873     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3874     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3875
3876     sampler_dn[index].dw6.dn_enable = 1;
3877     sampler_dn[index].dw6.di_enable = 0;
3878     sampler_dn[index].dw6.di_partial = 0;
3879     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3880     sampler_dn[index].dw6.dndi_stream_id = 1;
3881     sampler_dn[index].dw6.dndi_first_frame = 1;
3882     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3883     sampler_dn[index].dw6.mcdi_enable = 0;
3884     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3885     sampler_dn[index].dw6.cat_th1 = 0;
3886     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3887     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3888
3889     sampler_dn[index].dw7.sad_tha = 5;
3890     sampler_dn[index].dw7.sad_thb = 10;
3891     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3892     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3893     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3894     sampler_dn[index].dw7.vdi_walker_enable = 0;
3895     sampler_dn[index].dw7.neighborpixel_th = 10;
3896     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3897
3898     dri_bo_unmap(pp_context->sampler_state_table.bo);
3899
3900     /* private function & data */
3901     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3902     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3903     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3904
3905     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3906     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3907     pp_static_parameter->grf1.di_top_field_first = 0;
3908     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3909
3910     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3911     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3912     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3913
3914     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3915     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3916
3917     pp_dn_context->dest_w = w;
3918     pp_dn_context->dest_h = h;
3919
3920     dst_surface->flags = src_surface->flags;
3921
3922     return VA_STATUS_SUCCESS;
3923 }
3924
3925 static VAStatus
3926 ironlake_pp_initialize(
3927     VADriverContextP   ctx,
3928     struct i965_post_processing_context *pp_context,
3929     const struct i965_surface *src_surface,
3930     const VARectangle *src_rect,
3931     struct i965_surface *dst_surface,
3932     const VARectangle *dst_rect,
3933     int                pp_index,
3934     void *filter_param
3935 )
3936 {
3937     VAStatus va_status;
3938     struct i965_driver_data *i965 = i965_driver_data(ctx);
3939     struct pp_module *pp_module;
3940     dri_bo *bo;
3941     int static_param_size, inline_param_size;
3942
3943     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3944     bo = dri_bo_alloc(i965->intel.bufmgr,
3945                       "surface state & binding table",
3946                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3947                       4096);
3948     assert(bo);
3949     pp_context->surface_state_binding_table.bo = bo;
3950
3951     dri_bo_unreference(pp_context->curbe.bo);
3952     bo = dri_bo_alloc(i965->intel.bufmgr,
3953                       "constant buffer",
3954                       4096, 
3955                       4096);
3956     assert(bo);
3957     pp_context->curbe.bo = bo;
3958
3959     dri_bo_unreference(pp_context->idrt.bo);
3960     bo = dri_bo_alloc(i965->intel.bufmgr, 
3961                       "interface discriptor", 
3962                       sizeof(struct i965_interface_descriptor), 
3963                       4096);
3964     assert(bo);
3965     pp_context->idrt.bo = bo;
3966     pp_context->idrt.num_interface_descriptors = 0;
3967
3968     dri_bo_unreference(pp_context->sampler_state_table.bo);
3969     bo = dri_bo_alloc(i965->intel.bufmgr, 
3970                       "sampler state table", 
3971                       4096,
3972                       4096);
3973     assert(bo);
3974     dri_bo_map(bo, True);
3975     memset(bo->virtual, 0, bo->size);
3976     dri_bo_unmap(bo);
3977     pp_context->sampler_state_table.bo = bo;
3978
3979     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3980     bo = dri_bo_alloc(i965->intel.bufmgr, 
3981                       "sampler 8x8 state ",
3982                       4096,
3983                       4096);
3984     assert(bo);
3985     pp_context->sampler_state_table.bo_8x8 = bo;
3986
3987     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3988     bo = dri_bo_alloc(i965->intel.bufmgr, 
3989                       "sampler 8x8 state ",
3990                       4096,
3991                       4096);
3992     assert(bo);
3993     pp_context->sampler_state_table.bo_8x8_uv = bo;
3994
3995     dri_bo_unreference(pp_context->vfe_state.bo);
3996     bo = dri_bo_alloc(i965->intel.bufmgr, 
3997                       "vfe state", 
3998                       sizeof(struct i965_vfe_state), 
3999                       4096);
4000     assert(bo);
4001     pp_context->vfe_state.bo = bo;
4002
4003     static_param_size = sizeof(struct pp_static_parameter);
4004     inline_param_size = sizeof(struct pp_inline_parameter);
4005
4006     memset(pp_context->pp_static_parameter, 0, static_param_size);
4007     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4008     
4009     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4010     pp_context->current_pp = pp_index;
4011     pp_module = &pp_context->pp_modules[pp_index];
4012     
4013     if (pp_module->initialize)
4014         va_status = pp_module->initialize(ctx, pp_context,
4015                                           src_surface,
4016                                           src_rect,
4017                                           dst_surface,
4018                                           dst_rect,
4019                                           filter_param);
4020     else
4021         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4022
4023     return va_status;
4024 }
4025
4026 static VAStatus
4027 ironlake_post_processing(
4028     VADriverContextP   ctx,
4029     struct i965_post_processing_context *pp_context,
4030     const struct i965_surface *src_surface,
4031     const VARectangle *src_rect,
4032     struct i965_surface *dst_surface,
4033     const VARectangle *dst_rect,
4034     int                pp_index,
4035     void *filter_param
4036 )
4037 {
4038     VAStatus va_status;
4039
4040     va_status = ironlake_pp_initialize(ctx, pp_context,
4041                                        src_surface,
4042                                        src_rect,
4043                                        dst_surface,
4044                                        dst_rect,
4045                                        pp_index,
4046                                        filter_param);
4047
4048     if (va_status == VA_STATUS_SUCCESS) {
4049         ironlake_pp_states_setup(ctx, pp_context);
4050         ironlake_pp_pipeline_setup(ctx, pp_context);
4051     }
4052
4053     return va_status;
4054 }
4055
4056 static VAStatus
4057 gen6_pp_initialize(
4058     VADriverContextP   ctx,
4059     struct i965_post_processing_context *pp_context,
4060     const struct i965_surface *src_surface,
4061     const VARectangle *src_rect,
4062     struct i965_surface *dst_surface,
4063     const VARectangle *dst_rect,
4064     int                pp_index,
4065     void *filter_param
4066 )
4067 {
4068     VAStatus va_status;
4069     struct i965_driver_data *i965 = i965_driver_data(ctx);
4070     struct pp_module *pp_module;
4071     dri_bo *bo;
4072     int static_param_size, inline_param_size;
4073
4074     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4075     bo = dri_bo_alloc(i965->intel.bufmgr,
4076                       "surface state & binding table",
4077                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4078                       4096);
4079     assert(bo);
4080     pp_context->surface_state_binding_table.bo = bo;
4081
4082     dri_bo_unreference(pp_context->curbe.bo);
4083     bo = dri_bo_alloc(i965->intel.bufmgr,
4084                       "constant buffer",
4085                       4096, 
4086                       4096);
4087     assert(bo);
4088     pp_context->curbe.bo = bo;
4089
4090     dri_bo_unreference(pp_context->idrt.bo);
4091     bo = dri_bo_alloc(i965->intel.bufmgr, 
4092                       "interface discriptor", 
4093                       sizeof(struct gen6_interface_descriptor_data), 
4094                       4096);
4095     assert(bo);
4096     pp_context->idrt.bo = bo;
4097     pp_context->idrt.num_interface_descriptors = 0;
4098
4099     dri_bo_unreference(pp_context->sampler_state_table.bo);
4100     bo = dri_bo_alloc(i965->intel.bufmgr, 
4101                       "sampler state table", 
4102                       4096,
4103                       4096);
4104     assert(bo);
4105     dri_bo_map(bo, True);
4106     memset(bo->virtual, 0, bo->size);
4107     dri_bo_unmap(bo);
4108     pp_context->sampler_state_table.bo = bo;
4109
4110     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4111     bo = dri_bo_alloc(i965->intel.bufmgr, 
4112                       "sampler 8x8 state ",
4113                       4096,
4114                       4096);
4115     assert(bo);
4116     pp_context->sampler_state_table.bo_8x8 = bo;
4117
4118     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4119     bo = dri_bo_alloc(i965->intel.bufmgr, 
4120                       "sampler 8x8 state ",
4121                       4096,
4122                       4096);
4123     assert(bo);
4124     pp_context->sampler_state_table.bo_8x8_uv = bo;
4125
4126     dri_bo_unreference(pp_context->vfe_state.bo);
4127     bo = dri_bo_alloc(i965->intel.bufmgr, 
4128                       "vfe state", 
4129                       sizeof(struct i965_vfe_state), 
4130                       4096);
4131     assert(bo);
4132     pp_context->vfe_state.bo = bo;
4133     
4134     if (IS_GEN7(i965->intel.device_id)) {
4135         static_param_size = sizeof(struct gen7_pp_static_parameter);
4136         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4137     } else {
4138         static_param_size = sizeof(struct pp_static_parameter);
4139         inline_param_size = sizeof(struct pp_inline_parameter);
4140     }
4141
4142     memset(pp_context->pp_static_parameter, 0, static_param_size);
4143     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4144
4145     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4146     pp_context->current_pp = pp_index;
4147     pp_module = &pp_context->pp_modules[pp_index];
4148     
4149     if (pp_module->initialize)
4150         va_status = pp_module->initialize(ctx, pp_context,
4151                                           src_surface,
4152                                           src_rect,
4153                                           dst_surface,
4154                                           dst_rect,
4155                                           filter_param);
4156     else
4157         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4158
4159     calculate_boundary_block_mask(pp_context, dst_rect);
4160     
4161     return va_status;
4162 }
4163
4164 static void
4165 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4166                                    struct i965_post_processing_context *pp_context)
4167 {
4168     struct i965_driver_data *i965 = i965_driver_data(ctx);
4169     struct gen6_interface_descriptor_data *desc;
4170     dri_bo *bo;
4171     int pp_index = pp_context->current_pp;
4172
4173     bo = pp_context->idrt.bo;
4174     dri_bo_map(bo, True);
4175     assert(bo->virtual);
4176     desc = bo->virtual;
4177     memset(desc, 0, sizeof(*desc));
4178     desc->desc0.kernel_start_pointer = 
4179         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4180     desc->desc1.single_program_flow = 1;
4181     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4182     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4183     desc->desc2.sampler_state_pointer = 
4184         pp_context->sampler_state_table.bo->offset >> 5;
4185     desc->desc3.binding_table_entry_count = 0;
4186     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4187     desc->desc4.constant_urb_entry_read_offset = 0;
4188
4189     if (IS_GEN7(i965->intel.device_id))
4190         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4191     else
4192         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4193
4194     dri_bo_emit_reloc(bo,
4195                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4196                       0,
4197                       offsetof(struct gen6_interface_descriptor_data, desc0),
4198                       pp_context->pp_modules[pp_index].kernel.bo);
4199
4200     dri_bo_emit_reloc(bo,
4201                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4202                       desc->desc2.sampler_count << 2,
4203                       offsetof(struct gen6_interface_descriptor_data, desc2),
4204                       pp_context->sampler_state_table.bo);
4205
4206     dri_bo_unmap(bo);
4207     pp_context->idrt.num_interface_descriptors++;
4208 }
4209
4210 static void
4211 gen6_pp_upload_constants(VADriverContextP ctx,
4212                          struct i965_post_processing_context *pp_context)
4213 {
4214     struct i965_driver_data *i965 = i965_driver_data(ctx);
4215     unsigned char *constant_buffer;
4216     int param_size;
4217
4218     assert(sizeof(struct pp_static_parameter) == 128);
4219     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4220
4221     if (IS_GEN7(i965->intel.device_id))
4222         param_size = sizeof(struct gen7_pp_static_parameter);
4223     else
4224         param_size = sizeof(struct pp_static_parameter);
4225
4226     dri_bo_map(pp_context->curbe.bo, 1);
4227     assert(pp_context->curbe.bo->virtual);
4228     constant_buffer = pp_context->curbe.bo->virtual;
4229     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4230     dri_bo_unmap(pp_context->curbe.bo);
4231 }
4232
4233 static void
4234 gen6_pp_states_setup(VADriverContextP ctx,
4235                      struct i965_post_processing_context *pp_context)
4236 {
4237     gen6_pp_interface_descriptor_table(ctx, pp_context);
4238     gen6_pp_upload_constants(ctx, pp_context);
4239 }
4240
4241 static void
4242 gen6_pp_pipeline_select(VADriverContextP ctx,
4243                         struct i965_post_processing_context *pp_context)
4244 {
4245     struct intel_batchbuffer *batch = pp_context->batch;
4246
4247     BEGIN_BATCH(batch, 1);
4248     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4249     ADVANCE_BATCH(batch);
4250 }
4251
4252 static void
4253 gen6_pp_state_base_address(VADriverContextP ctx,
4254                            struct i965_post_processing_context *pp_context)
4255 {
4256     struct intel_batchbuffer *batch = pp_context->batch;
4257
4258     BEGIN_BATCH(batch, 10);
4259     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4260     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4261     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4262     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4263     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4264     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4265     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4266     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4267     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4268     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4269     ADVANCE_BATCH(batch);
4270 }
4271
4272 static void
4273 gen6_pp_vfe_state(VADriverContextP ctx,
4274                   struct i965_post_processing_context *pp_context)
4275 {
4276     struct intel_batchbuffer *batch = pp_context->batch;
4277
4278     BEGIN_BATCH(batch, 8);
4279     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4280     OUT_BATCH(batch, 0);
4281     OUT_BATCH(batch,
4282               (pp_context->urb.num_vfe_entries - 1) << 16 |
4283               pp_context->urb.num_vfe_entries << 8);
4284     OUT_BATCH(batch, 0);
4285     OUT_BATCH(batch,
4286               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
4287               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
4288     OUT_BATCH(batch, 0);
4289     OUT_BATCH(batch, 0);
4290     OUT_BATCH(batch, 0);
4291     ADVANCE_BATCH(batch);
4292 }
4293
4294 static void
4295 gen6_pp_curbe_load(VADriverContextP ctx,
4296                    struct i965_post_processing_context *pp_context)
4297 {
4298     struct intel_batchbuffer *batch = pp_context->batch;
4299
4300     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
4301
4302     BEGIN_BATCH(batch, 4);
4303     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4304     OUT_BATCH(batch, 0);
4305     OUT_BATCH(batch,
4306               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
4307     OUT_RELOC(batch, 
4308               pp_context->curbe.bo,
4309               I915_GEM_DOMAIN_INSTRUCTION, 0,
4310               0);
4311     ADVANCE_BATCH(batch);
4312 }
4313
4314 static void
4315 gen6_interface_descriptor_load(VADriverContextP ctx,
4316                                struct i965_post_processing_context *pp_context)
4317 {
4318     struct intel_batchbuffer *batch = pp_context->batch;
4319
4320     BEGIN_BATCH(batch, 4);
4321     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4322     OUT_BATCH(batch, 0);
4323     OUT_BATCH(batch,
4324               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4325     OUT_RELOC(batch, 
4326               pp_context->idrt.bo,
4327               I915_GEM_DOMAIN_INSTRUCTION, 0,
4328               0);
4329     ADVANCE_BATCH(batch);
4330 }
4331
4332 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4333 {
4334     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4335
4336     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4337     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4338     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4339     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4340     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4341     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4342
4343     /* 1 x N */
4344     if (x_steps == 1) {
4345         if (y == y_steps-1) {
4346             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4347         }
4348         else {
4349             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4350         }
4351     }
4352
4353     /* M x 1 */
4354     if (y_steps == 1) {
4355         if (x == 0) { // all blocks in this group are on the left edge
4356             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4357             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4358         }
4359         else if (x == x_steps-1) {
4360             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4361             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4362         }
4363         else {
4364             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4365             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4366             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4367         }
4368     }
4369
4370 }
4371
4372 static void
4373 gen6_pp_object_walker(VADriverContextP ctx,
4374                       struct i965_post_processing_context *pp_context)
4375 {
4376     struct i965_driver_data *i965 = i965_driver_data(ctx);
4377     struct intel_batchbuffer *batch = pp_context->batch;
4378     int x, x_steps, y, y_steps;
4379     int param_size, command_length_in_dws;
4380     dri_bo *command_buffer;
4381     unsigned int *command_ptr;
4382
4383     if (IS_GEN7(i965->intel.device_id))
4384         param_size = sizeof(struct gen7_pp_inline_parameter);
4385     else
4386         param_size = sizeof(struct pp_inline_parameter);
4387
4388     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
4389     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
4390     command_length_in_dws = 6 + (param_size >> 2);
4391     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4392                                   "command objects buffer",
4393                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4394                                   4096);
4395
4396     dri_bo_map(command_buffer, 1);
4397     command_ptr = command_buffer->virtual;
4398
4399     for (y = 0; y < y_steps; y++) {
4400         for (x = 0; x < x_steps; x++) {
4401             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4402                 // some common block parameter update goes here, apply to all pp functions
4403                 if (IS_GEN6(i965->intel.device_id))
4404                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4405                 
4406                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4407                 *command_ptr++ = 0;
4408                 *command_ptr++ = 0;
4409                 *command_ptr++ = 0;
4410                 *command_ptr++ = 0;
4411                 *command_ptr++ = 0;
4412                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4413                 command_ptr += (param_size >> 2);
4414             }
4415         }
4416     }
4417
4418     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4419         *command_ptr++ = 0;
4420
4421     *command_ptr = MI_BATCH_BUFFER_END;
4422
4423     dri_bo_unmap(command_buffer);
4424
4425     BEGIN_BATCH(batch, 2);
4426     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
4427     OUT_RELOC(batch, command_buffer, 
4428               I915_GEM_DOMAIN_COMMAND, 0, 
4429               0);
4430     ADVANCE_BATCH(batch);
4431     
4432     dri_bo_unreference(command_buffer);
4433
4434     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4435      * will cause control to pass back to ring buffer 
4436      */
4437     intel_batchbuffer_end_atomic(batch);
4438     intel_batchbuffer_flush(batch);
4439     intel_batchbuffer_start_atomic(batch, 0x1000);
4440 }
4441
4442 static void
4443 gen6_pp_pipeline_setup(VADriverContextP ctx,
4444                        struct i965_post_processing_context *pp_context)
4445 {
4446     struct intel_batchbuffer *batch = pp_context->batch;
4447
4448     intel_batchbuffer_start_atomic(batch, 0x1000);
4449     intel_batchbuffer_emit_mi_flush(batch);
4450     gen6_pp_pipeline_select(ctx, pp_context);
4451     gen6_pp_state_base_address(ctx, pp_context);
4452     gen6_pp_vfe_state(ctx, pp_context);
4453     gen6_pp_curbe_load(ctx, pp_context);
4454     gen6_interface_descriptor_load(ctx, pp_context);
4455     gen6_pp_object_walker(ctx, pp_context);
4456     intel_batchbuffer_end_atomic(batch);
4457 }
4458
4459 static VAStatus
4460 gen6_post_processing(
4461     VADriverContextP   ctx,
4462     struct i965_post_processing_context *pp_context,
4463     const struct i965_surface *src_surface,
4464     const VARectangle *src_rect,
4465     struct i965_surface *dst_surface,
4466     const VARectangle *dst_rect,
4467     int                pp_index,
4468     void * filter_param
4469 )
4470 {
4471     VAStatus va_status;
4472     
4473     va_status = gen6_pp_initialize(ctx, pp_context,
4474                                    src_surface,
4475                                    src_rect,
4476                                    dst_surface,
4477                                    dst_rect,
4478                                    pp_index,
4479                                    filter_param);
4480
4481     if (va_status == VA_STATUS_SUCCESS) {
4482         gen6_pp_states_setup(ctx, pp_context);
4483         gen6_pp_pipeline_setup(ctx, pp_context);
4484     }
4485
4486     return va_status;
4487 }
4488
4489 static VAStatus
4490 i965_post_processing_internal(
4491     VADriverContextP   ctx,
4492     struct i965_post_processing_context *pp_context,
4493     const struct i965_surface *src_surface,
4494     const VARectangle *src_rect,
4495     struct i965_surface *dst_surface,
4496     const VARectangle *dst_rect,
4497     int                pp_index,
4498     void *filter_param
4499 )
4500 {
4501     struct i965_driver_data *i965 = i965_driver_data(ctx);
4502     VAStatus va_status;
4503
4504     if (IS_GEN6(i965->intel.device_id) ||
4505         IS_GEN7(i965->intel.device_id))
4506         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4507     else
4508         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4509     
4510     return va_status;
4511 }
4512
4513 VAStatus 
4514 i965_DestroySurfaces(VADriverContextP ctx,
4515                      VASurfaceID *surface_list,
4516                      int num_surfaces);
4517 VAStatus 
4518 i965_CreateSurfaces(VADriverContextP ctx,
4519                     int width,
4520                     int height,
4521                     int format,
4522                     int num_surfaces,
4523                     VASurfaceID *surfaces);
4524
4525 static void
4526 rgb_to_yuv(unsigned int argb,
4527            unsigned char *y,
4528            unsigned char *u,
4529            unsigned char *v,
4530            unsigned char *a)
4531 {
4532     int r = ((argb >> 16) & 0xff);
4533     int g = ((argb >> 8) & 0xff);
4534     int b = ((argb >> 0) & 0xff);
4535     
4536     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4537     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4538     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4539     *a = ((argb >> 24) & 0xff);
4540 }
4541
4542 static void 
4543 i965_vpp_clear_surface(VADriverContextP ctx,
4544                        struct i965_post_processing_context *pp_context,
4545                        VASurfaceID surface,
4546                        unsigned int color)
4547 {
4548     struct i965_driver_data *i965 = i965_driver_data(ctx);
4549     struct intel_batchbuffer *batch = pp_context->batch;
4550     struct object_surface *obj_surface = SURFACE(surface);
4551     unsigned int blt_cmd, br13;
4552     unsigned int tiling = 0, swizzle = 0;
4553     int pitch;
4554     unsigned char y, u, v, a = 0;
4555
4556     /* Currently only support NV12 surface */
4557     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4558         return;
4559
4560     rgb_to_yuv(color, &y, &u, &v, &a);
4561
4562     if (a == 0)
4563         return;
4564
4565     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4566     blt_cmd = XY_COLOR_BLT_CMD;
4567     pitch = obj_surface->width;
4568
4569     if (tiling != I915_TILING_NONE) {
4570         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4571         pitch >>= 2;
4572     }
4573
4574     br13 = 0xf0 << 16;
4575     br13 |= BR13_8;
4576     br13 |= pitch;
4577
4578     if (IS_GEN6(i965->intel.device_id) ||
4579         IS_GEN7(i965->intel.device_id)) {
4580         intel_batchbuffer_start_atomic_blt(batch, 48);
4581         BEGIN_BLT_BATCH(batch, 12);
4582     } else {
4583         intel_batchbuffer_start_atomic(batch, 48);
4584         BEGIN_BATCH(batch, 12);
4585     }
4586
4587     OUT_BATCH(batch, blt_cmd);
4588     OUT_BATCH(batch, br13);
4589     OUT_BATCH(batch,
4590               0 << 16 |
4591               0);
4592     OUT_BATCH(batch,
4593               obj_surface->height << 16 |
4594               obj_surface->width);
4595     OUT_RELOC(batch, obj_surface->bo, 
4596               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4597               0);
4598     OUT_BATCH(batch, y);
4599
4600     br13 = 0xf0 << 16;
4601     br13 |= BR13_565;
4602     br13 |= pitch;
4603
4604     OUT_BATCH(batch, blt_cmd);
4605     OUT_BATCH(batch, br13);
4606     OUT_BATCH(batch,
4607               0 << 16 |
4608               0);
4609     OUT_BATCH(batch,
4610               obj_surface->height / 2 << 16 |
4611               obj_surface->width / 2);
4612     OUT_RELOC(batch, obj_surface->bo, 
4613               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4614               obj_surface->width * obj_surface->y_cb_offset);
4615     OUT_BATCH(batch, v << 8 | u);
4616
4617     ADVANCE_BATCH(batch);
4618     intel_batchbuffer_end_atomic(batch);
4619 }
4620
4621 VAStatus
4622 i965_scaling_processing(
4623     VADriverContextP   ctx,
4624     VASurfaceID        src_surface_id,
4625     const VARectangle *src_rect,
4626     VASurfaceID        dst_surface_id,
4627     const VARectangle *dst_rect,
4628     unsigned int       flags)
4629 {
4630     VAStatus va_status = VA_STATUS_SUCCESS;
4631     struct i965_driver_data *i965 = i965_driver_data(ctx);
4632     struct object_surface *src_surface_obj = SURFACE(src_surface_id);
4633     struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
4634  
4635     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4636     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4637
4638     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4639         struct i965_surface src_surface;
4640         struct i965_surface dst_surface;
4641
4642          _i965LockMutex(&i965->pp_mutex);
4643
4644          src_surface.id = src_surface_id;
4645          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4646          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4647          dst_surface.id = dst_surface_id;
4648          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4649          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4650
4651          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4652                                                    &src_surface,
4653                                                    src_rect,
4654                                                    &dst_surface,
4655                                                    dst_rect,
4656                                                    PP_NV12_AVS,
4657                                                    NULL);
4658
4659          _i965UnlockMutex(&i965->pp_mutex);
4660     }
4661
4662     return va_status;
4663 }
4664
4665 VASurfaceID
4666 i965_post_processing(
4667     VADriverContextP   ctx,
4668     VASurfaceID        surface,
4669     const VARectangle *src_rect,
4670     const VARectangle *dst_rect,
4671     unsigned int       flags,
4672     int               *has_done_scaling  
4673 )
4674 {
4675     struct i965_driver_data *i965 = i965_driver_data(ctx);
4676     VASurfaceID in_surface_id = surface;
4677     VASurfaceID out_surface_id = VA_INVALID_ID;
4678     
4679     *has_done_scaling = 0;
4680
4681     if (HAS_PP(i965)) {
4682         struct object_surface *obj_surface;
4683         VAStatus status;
4684         struct i965_surface src_surface;
4685         struct i965_surface dst_surface;
4686
4687         obj_surface = SURFACE(in_surface_id);
4688
4689         /* Currently only support post processing for NV12 surface */
4690         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4691             return out_surface_id;
4692
4693         _i965LockMutex(&i965->pp_mutex);
4694
4695         if (flags & I965_PP_FLAG_MCDI) {
4696             status = i965_CreateSurfaces(ctx,
4697                                          obj_surface->orig_width,
4698                                          obj_surface->orig_height,
4699                                          VA_RT_FORMAT_YUV420,
4700                                          1,
4701                                          &out_surface_id);
4702             assert(status == VA_STATUS_SUCCESS);
4703             obj_surface = SURFACE(out_surface_id);
4704             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4705             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4706             src_surface.id = in_surface_id;
4707             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4708             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4709                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4710             dst_surface.id = out_surface_id;
4711             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4712             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4713
4714             i965_post_processing_internal(ctx, i965->pp_context,
4715                                           &src_surface,
4716                                           src_rect,
4717                                           &dst_surface,
4718                                           dst_rect,
4719                                           PP_NV12_DNDI,
4720                                           NULL);
4721         }
4722
4723         if (flags & I965_PP_FLAG_AVS) {
4724             struct i965_render_state *render_state = &i965->render_state;
4725             struct intel_region *dest_region = render_state->draw_region;
4726
4727             if (out_surface_id != VA_INVALID_ID)
4728                 in_surface_id = out_surface_id;
4729
4730             status = i965_CreateSurfaces(ctx,
4731                                          dest_region->width,
4732                                          dest_region->height,
4733                                          VA_RT_FORMAT_YUV420,
4734                                          1,
4735                                          &out_surface_id);
4736             assert(status == VA_STATUS_SUCCESS);
4737             obj_surface = SURFACE(out_surface_id);
4738             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4739             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4740             src_surface.id = in_surface_id;
4741             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4742             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4743             dst_surface.id = out_surface_id;
4744             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4745             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4746
4747             i965_post_processing_internal(ctx, i965->pp_context,
4748                                           &src_surface,
4749                                           src_rect,
4750                                           &dst_surface,
4751                                           dst_rect,
4752                                           PP_NV12_AVS,
4753                                           NULL);
4754
4755             if (in_surface_id != surface)
4756                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4757                 
4758             *has_done_scaling = 1;
4759         }
4760
4761         _i965UnlockMutex(&i965->pp_mutex);
4762     }
4763
4764     return out_surface_id;
4765 }       
4766
4767 static VAStatus
4768 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4769                           const struct i965_surface *src_surface,
4770                           const VARectangle *src_rect,
4771                           struct i965_surface *dst_surface,
4772                           const VARectangle *dst_rect)
4773 {
4774     struct i965_driver_data *i965 = i965_driver_data(ctx);
4775     struct i965_post_processing_context *pp_context = i965->pp_context;
4776     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4777
4778     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4779         i965_post_processing_internal(ctx, i965->pp_context,
4780                                       src_surface,
4781                                       src_rect,
4782                                       dst_surface,
4783                                       dst_rect,
4784                                       PP_RGBX_LOAD_SAVE_NV12,
4785                                       NULL);
4786     } else {
4787         assert(0);
4788         return VA_STATUS_ERROR_UNKNOWN;
4789     }
4790
4791     intel_batchbuffer_flush(pp_context->batch);
4792
4793     return VA_STATUS_SUCCESS;
4794 }
4795
4796 static VAStatus
4797 i965_image_pl3_processing(VADriverContextP ctx,
4798                           const struct i965_surface *src_surface,
4799                           const VARectangle *src_rect,
4800                           struct i965_surface *dst_surface,
4801                           const VARectangle *dst_rect)
4802 {
4803     struct i965_driver_data *i965 = i965_driver_data(ctx);
4804     struct i965_post_processing_context *pp_context = i965->pp_context;
4805     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4806     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4807
4808     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4809         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4810                                                  src_surface,
4811                                                  src_rect,
4812                                                  dst_surface,
4813                                                  dst_rect,
4814                                                  PP_PL3_LOAD_SAVE_N12,
4815                                                  NULL);
4816     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4817                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4818                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4819                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4820         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4821                                                  src_surface,
4822                                                  src_rect,
4823                                                  dst_surface,
4824                                                  dst_rect,
4825                                                  PP_PL3_LOAD_SAVE_PL3,
4826                                                  NULL);
4827     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4828                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4829         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4830                                                  src_surface,
4831                                                  src_rect,
4832                                                  dst_surface,
4833                                                  dst_rect,
4834                                                  PP_PL3_LOAD_SAVE_PA,
4835                                                  NULL);
4836     }
4837     else {
4838         assert(0);
4839     }
4840
4841     intel_batchbuffer_flush(pp_context->batch);
4842
4843     return vaStatus;
4844 }
4845
4846 static VAStatus
4847 i965_image_pl2_processing(VADriverContextP ctx,
4848                           const struct i965_surface *src_surface,
4849                           const VARectangle *src_rect,
4850                           struct i965_surface *dst_surface,
4851                           const VARectangle *dst_rect)
4852 {
4853     struct i965_driver_data *i965 = i965_driver_data(ctx);
4854     struct i965_post_processing_context *pp_context = i965->pp_context;
4855     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4856     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4857
4858     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4859         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4860                                                  src_surface,
4861                                                  src_rect,
4862                                                  dst_surface,
4863                                                  dst_rect,
4864                                                  PP_NV12_LOAD_SAVE_N12,
4865                                                  NULL);
4866     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4867                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4868                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4869                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4870         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4871                                                  src_surface,
4872                                                  src_rect,
4873                                                  dst_surface,
4874                                                  dst_rect,
4875                                                  PP_NV12_LOAD_SAVE_PL3,
4876                                                  NULL);
4877     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4878                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4879         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4880                                                  src_surface,
4881                                                  src_rect,
4882                                                  dst_surface,
4883                                                  dst_rect,
4884                                                  PP_NV12_LOAD_SAVE_PA,
4885                                                      NULL);
4886     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4887                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4888                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4889                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4890         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4891                                       src_surface,
4892                                       src_rect,
4893                                       dst_surface,
4894                                       dst_rect,
4895                                       PP_NV12_LOAD_SAVE_RGBX,
4896                                       NULL);
4897     } else {
4898         assert(0);
4899         return VA_STATUS_ERROR_UNKNOWN;
4900     }
4901
4902     intel_batchbuffer_flush(pp_context->batch);
4903
4904     return vaStatus;
4905 }
4906
4907 static VAStatus
4908 i965_image_pl1_processing(VADriverContextP ctx,
4909                           const struct i965_surface *src_surface,
4910                           const VARectangle *src_rect,
4911                           struct i965_surface *dst_surface,
4912                           const VARectangle *dst_rect)
4913 {
4914     struct i965_driver_data *i965 = i965_driver_data(ctx);
4915     struct i965_post_processing_context *pp_context = i965->pp_context;
4916     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4917
4918     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4919         i965_post_processing_internal(ctx, i965->pp_context,
4920                                       src_surface,
4921                                       src_rect,
4922                                       dst_surface,
4923                                       dst_rect,
4924                                       PP_PA_LOAD_SAVE_NV12,
4925                                       NULL);
4926     }
4927     else if (fourcc == VA_FOURCC_YV12) {
4928         i965_post_processing_internal(ctx, i965->pp_context,
4929                                       src_surface,
4930                                       src_rect,
4931                                       dst_surface,
4932                                       dst_rect,
4933                                       PP_PA_LOAD_SAVE_PL3,
4934                                       NULL);
4935
4936     }
4937     else {
4938         return VA_STATUS_ERROR_UNKNOWN;
4939     }
4940
4941     intel_batchbuffer_flush(pp_context->batch);
4942
4943     return VA_STATUS_SUCCESS;
4944 }
4945
4946 VAStatus
4947 i965_image_processing(VADriverContextP ctx,
4948                       const struct i965_surface *src_surface,
4949                       const VARectangle *src_rect,
4950                       struct i965_surface *dst_surface,
4951                       const VARectangle *dst_rect)
4952 {
4953     struct i965_driver_data *i965 = i965_driver_data(ctx);
4954     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4955
4956     if (HAS_PP(i965)) {
4957         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4958
4959         _i965LockMutex(&i965->pp_mutex);
4960
4961         switch (fourcc) {
4962         case VA_FOURCC('Y', 'V', '1', '2'):
4963         case VA_FOURCC('I', '4', '2', '0'):
4964         case VA_FOURCC('I', 'M', 'C', '1'):
4965         case VA_FOURCC('I', 'M', 'C', '3'):
4966             status = i965_image_pl3_processing(ctx,
4967                                                src_surface,
4968                                                src_rect,
4969                                                dst_surface,
4970                                                dst_rect);
4971             break;
4972
4973         case  VA_FOURCC('N', 'V', '1', '2'):
4974             status = i965_image_pl2_processing(ctx,
4975                                                src_surface,
4976                                                src_rect,
4977                                                dst_surface,
4978                                                dst_rect);
4979             break;
4980         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4981         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4982             status = i965_image_pl1_processing(ctx,
4983                                                src_surface,
4984                                                src_rect,
4985                                                dst_surface,
4986                                                dst_rect);
4987             break;
4988         case VA_FOURCC('B', 'G', 'R', 'A'):
4989         case VA_FOURCC('B', 'G', 'R', 'X'):
4990         case VA_FOURCC('R', 'G', 'B', 'A'):
4991         case VA_FOURCC('R', 'G', 'B', 'X'):
4992             status = i965_image_pl1_rgbx_processing(ctx,
4993                                                src_surface,
4994                                                src_rect,
4995                                                dst_surface,
4996                                                dst_rect);
4997             break;
4998         default:
4999             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5000             break;
5001         }
5002         
5003         _i965UnlockMutex(&i965->pp_mutex);
5004     }
5005
5006     return status;
5007 }       
5008
5009 static void
5010 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
5011 {
5012     int i;
5013
5014     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5015     pp_context->surface_state_binding_table.bo = NULL;
5016
5017     dri_bo_unreference(pp_context->curbe.bo);
5018     pp_context->curbe.bo = NULL;
5019
5020     dri_bo_unreference(pp_context->sampler_state_table.bo);
5021     pp_context->sampler_state_table.bo = NULL;
5022
5023     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5024     pp_context->sampler_state_table.bo_8x8 = NULL;
5025
5026     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5027     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5028
5029     dri_bo_unreference(pp_context->idrt.bo);
5030     pp_context->idrt.bo = NULL;
5031     pp_context->idrt.num_interface_descriptors = 0;
5032
5033     dri_bo_unreference(pp_context->vfe_state.bo);
5034     pp_context->vfe_state.bo = NULL;
5035
5036     dri_bo_unreference(pp_context->stmm.bo);
5037     pp_context->stmm.bo = NULL;
5038
5039     for (i = 0; i < NUM_PP_MODULES; i++) {
5040         struct pp_module *pp_module = &pp_context->pp_modules[i];
5041
5042         dri_bo_unreference(pp_module->kernel.bo);
5043         pp_module->kernel.bo = NULL;
5044     }
5045
5046     free(pp_context->pp_static_parameter);
5047     free(pp_context->pp_inline_parameter);
5048     pp_context->pp_static_parameter = NULL;
5049     pp_context->pp_inline_parameter = NULL;
5050 }
5051
5052 Bool
5053 i965_post_processing_terminate(VADriverContextP ctx)
5054 {
5055     struct i965_driver_data *i965 = i965_driver_data(ctx);
5056     struct i965_post_processing_context *pp_context = i965->pp_context;
5057
5058     if (pp_context) {
5059         i965_post_processing_context_finalize(pp_context);
5060         free(pp_context);
5061     }
5062
5063     i965->pp_context = NULL;
5064
5065     return True;
5066 }
5067
5068 static void
5069 i965_post_processing_context_init(VADriverContextP ctx,
5070                                   struct i965_post_processing_context *pp_context,
5071                                   struct intel_batchbuffer *batch)
5072 {
5073     struct i965_driver_data *i965 = i965_driver_data(ctx);
5074     int i;
5075
5076     pp_context->urb.size = URB_SIZE((&i965->intel));
5077     pp_context->urb.num_vfe_entries = 32;
5078     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5079     pp_context->urb.num_cs_entries = 1;
5080     
5081     if (IS_GEN7(i965->intel.device_id))
5082         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
5083     else
5084         pp_context->urb.size_cs_entry = 2;
5085
5086     pp_context->urb.vfe_start = 0;
5087     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5088         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5089     assert(pp_context->urb.cs_start + 
5090            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
5091
5092     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5093     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5094     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5095     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5096
5097     if (IS_HASWELL(i965->intel.device_id))
5098         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5099     else if (IS_GEN7(i965->intel.device_id))
5100         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5101     else if (IS_GEN6(i965->intel.device_id))
5102         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5103     else if (IS_IRONLAKE(i965->intel.device_id))
5104         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5105
5106     for (i = 0; i < NUM_PP_MODULES; i++) {
5107         struct pp_module *pp_module = &pp_context->pp_modules[i];
5108         dri_bo_unreference(pp_module->kernel.bo);
5109         if (pp_module->kernel.bin && pp_module->kernel.size) {
5110             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5111                                                 pp_module->kernel.name,
5112                                                 pp_module->kernel.size,
5113                                                 4096);
5114             assert(pp_module->kernel.bo);
5115             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5116         } else {
5117             pp_module->kernel.bo = NULL;
5118         }
5119     }
5120
5121     /* static & inline parameters */
5122     if (IS_GEN7(i965->intel.device_id)) {
5123         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5124         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5125     } else {
5126         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5127         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5128     }
5129
5130     pp_context->batch = batch;
5131 }
5132
5133 Bool
5134 i965_post_processing_init(VADriverContextP ctx)
5135 {
5136     struct i965_driver_data *i965 = i965_driver_data(ctx);
5137     struct i965_post_processing_context *pp_context = i965->pp_context;
5138
5139     if (HAS_PP(i965)) {
5140         if (pp_context == NULL) {
5141             pp_context = calloc(1, sizeof(*pp_context));
5142             i965_post_processing_context_init(ctx, pp_context, i965->batch);
5143             i965->pp_context = pp_context;
5144         }
5145     }
5146
5147     return True;
5148 }
5149
5150 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5151     PP_NULL,    /* VAProcFilterNone */
5152     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5153     PP_NULL,    /* VAProcFilterDeblocking */
5154     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5155     PP_NULL,    /* VAProcFilterSharpening */
5156     PP_NULL,    /* VAProcFilterColorBalance */
5157     PP_NULL,    /* VAProcFilterColorStandard */
5158     PP_NULL,    /* VAProcFilterFrameRateConversion */
5159 };
5160
5161 static const int proc_frame_to_pp_frame[3] = {
5162     I965_SURFACE_FLAG_FRAME,
5163     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5164     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5165 };
5166
5167 void 
5168 i965_proc_picture(VADriverContextP ctx, 
5169                   VAProfile profile, 
5170                   union codec_state *codec_state,
5171                   struct hw_context *hw_context)
5172 {
5173     struct i965_driver_data *i965 = i965_driver_data(ctx);
5174     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5175     struct proc_state *proc_state = &codec_state->proc;
5176     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5177     struct object_surface *obj_surface;
5178     struct i965_surface src_surface, dst_surface;
5179     VARectangle src_rect, dst_rect;
5180     VAStatus status;
5181     int i;
5182     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5183     int num_tmp_surfaces = 0;
5184     unsigned int tiling = 0, swizzle = 0;
5185     int in_width, in_height;
5186
5187     assert(pipeline_param->surface != VA_INVALID_ID);
5188     assert(proc_state->current_render_target != VA_INVALID_ID);
5189
5190     obj_surface = SURFACE(pipeline_param->surface);
5191     in_width = obj_surface->orig_width;
5192     in_height = obj_surface->orig_height;
5193     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5194
5195     src_surface.id = pipeline_param->surface;
5196     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5197     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5198
5199     VASurfaceID out_surface_id = VA_INVALID_ID;
5200     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
5201         src_surface.id = pipeline_param->surface;
5202         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5203         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5204         src_rect.x = 0;
5205         src_rect.y = 0;
5206         src_rect.width = in_width;
5207         src_rect.height = in_height;
5208
5209         status = i965_CreateSurfaces(ctx,
5210                                      in_width,
5211                                      in_height,
5212                                      VA_RT_FORMAT_YUV420,
5213                                      1,
5214                                      &out_surface_id);
5215         assert(status == VA_STATUS_SUCCESS);
5216         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5217         obj_surface = SURFACE(out_surface_id);
5218         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
5219
5220         dst_surface.id = out_surface_id;
5221         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5222         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5223         dst_rect.x = 0;
5224         dst_rect.y = 0;
5225         dst_rect.width = in_width;
5226         dst_rect.height = in_height;
5227
5228         status = i965_image_processing(ctx,
5229                                        &src_surface,
5230                                        &src_rect,
5231                                        &dst_surface,
5232                                        &dst_rect);
5233         assert(status == VA_STATUS_SUCCESS);
5234
5235         src_surface.id = out_surface_id;
5236         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5237         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5238     }
5239
5240     if (pipeline_param->surface_region) {
5241         src_rect.x = pipeline_param->surface_region->x;
5242         src_rect.y = pipeline_param->surface_region->y;
5243         src_rect.width = pipeline_param->surface_region->width;
5244         src_rect.height = pipeline_param->surface_region->height;
5245     } else {
5246         src_rect.x = 0;
5247         src_rect.y = 0;
5248         src_rect.width = in_width;
5249         src_rect.height = in_height;
5250     }
5251
5252     if (pipeline_param->output_region) {
5253         dst_rect.x = pipeline_param->output_region->x;
5254         dst_rect.y = pipeline_param->output_region->y;
5255         dst_rect.width = pipeline_param->output_region->width;
5256         dst_rect.height = pipeline_param->output_region->height;
5257     } else {
5258         dst_rect.x = 0;
5259         dst_rect.y = 0;
5260         dst_rect.width = in_width;
5261         dst_rect.height = in_height;
5262     }
5263
5264     for (i = 0; i < pipeline_param->num_filters; i++) {
5265         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5266         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5267         VAProcFilterType filter_type = filter_param->type;
5268         out_surface_id = VA_INVALID_ID;
5269         int kernel_index = procfilter_to_pp_flag[filter_type];
5270
5271         if (kernel_index != PP_NULL &&
5272             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5273             status = i965_CreateSurfaces(ctx,
5274                                          in_width,
5275                                          in_height,
5276                                          VA_RT_FORMAT_YUV420,
5277                                          1,
5278                                          &out_surface_id);
5279             assert(status == VA_STATUS_SUCCESS);
5280             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5281             obj_surface = SURFACE(out_surface_id);
5282             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5283             dst_surface.id = out_surface_id;
5284             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5285             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5286                                                    &src_surface,
5287                                                    &src_rect,
5288                                                    &dst_surface,
5289                                                    &src_rect,
5290                                                    kernel_index,
5291                                                    filter_param);
5292
5293             if (status == VA_STATUS_SUCCESS) {
5294                 src_surface.id = dst_surface.id;
5295                 src_surface.type = dst_surface.type;
5296                 src_surface.flags = dst_surface.flags;
5297             }
5298         }
5299     }
5300
5301     obj_surface = SURFACE(proc_state->current_render_target);
5302     int csc_needed = 0;
5303     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
5304         csc_needed = 1;
5305         out_surface_id = VA_INVALID_ID;
5306         status = i965_CreateSurfaces(ctx,
5307                                      obj_surface->orig_width,
5308                                      obj_surface->orig_height,
5309                                      VA_RT_FORMAT_YUV420, 
5310                                      1,
5311                                      &out_surface_id);
5312         assert(status == VA_STATUS_SUCCESS);
5313         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5314         struct object_surface *csc_surface = SURFACE(out_surface_id);
5315         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5316         dst_surface.id = out_surface_id;
5317     } else {
5318         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5319         dst_surface.id = proc_state->current_render_target;
5320     }
5321
5322     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5323     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
5324
5325     // load/save doesn't support different origin offset for src and dst surface
5326     if (src_rect.width == dst_rect.width &&
5327         src_rect.height == dst_rect.height &&
5328         src_rect.x == dst_rect.x &&
5329         src_rect.y == dst_rect.y) {
5330         i965_post_processing_internal(ctx, &proc_context->pp_context,
5331                                       &src_surface,
5332                                       &src_rect,
5333                                       &dst_surface,
5334                                       &dst_rect,
5335                                       PP_NV12_LOAD_SAVE_N12,
5336                                       NULL);
5337     } else {
5338
5339         i965_post_processing_internal(ctx, &proc_context->pp_context,
5340                                       &src_surface,
5341                                       &src_rect,
5342                                       &dst_surface,
5343                                       &dst_rect,
5344                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5345                                       PP_NV12_AVS : PP_NV12_SCALING,
5346                                       NULL);
5347     }
5348
5349     if (csc_needed) {
5350         src_surface.id = dst_surface.id;
5351         src_surface.type = dst_surface.type;
5352         src_surface.flags = dst_surface.flags;
5353         dst_surface.id = proc_state->current_render_target;
5354         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5355         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5356     }
5357     
5358     if (num_tmp_surfaces)
5359         i965_DestroySurfaces(ctx,
5360                              tmp_surfaces,
5361                              num_tmp_surfaces);
5362
5363     intel_batchbuffer_flush(hw_context->batch);
5364 }
5365
5366 static void
5367 i965_proc_context_destroy(void *hw_context)
5368 {
5369     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5370
5371     i965_post_processing_context_finalize(&proc_context->pp_context);
5372     intel_batchbuffer_free(proc_context->base.batch);
5373     free(proc_context);
5374 }
5375
5376 struct hw_context *
5377 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5378 {
5379     struct intel_driver_data *intel = intel_driver_data(ctx);
5380     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5381
5382     proc_context->base.destroy = i965_proc_context_destroy;
5383     proc_context->base.run = i965_proc_picture;
5384     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5385     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5386
5387     return (struct hw_context *)proc_context;
5388 }