Update the binding table index on IVB
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41 #include "intel_media.h"
42
43 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
44                      IS_GEN6((ctx)->intel.device_id) ||         \
45                      IS_GEN7((ctx)->intel.device_id))
46
47 #define SURFACE_STATE_PADDED_SIZE_0_I965        ALIGN(sizeof(struct i965_surface_state), 32)
48 #define SURFACE_STATE_PADDED_SIZE_1_I965        ALIGN(sizeof(struct i965_surface_state2), 32)
49 #define SURFACE_STATE_PADDED_SIZE_I965          MAX(SURFACE_STATE_PADDED_SIZE_0_I965, SURFACE_STATE_PADDED_SIZE_1_I965)
50
51 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
52 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
53 #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
54
55 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7)
56 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
57 #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_PP_SURFACES)
58
59 #define GPU_ASM_BLOCK_WIDTH         16
60 #define GPU_ASM_BLOCK_HEIGHT        8
61 #define GPU_ASM_X_OFFSET_ALIGNMENT  4
62
63 static const uint32_t pp_null_gen5[][4] = {
64 #include "shaders/post_processing/gen5_6/null.g4b.gen5"
65 };
66
67 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
68 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g4b.gen5"
69 };
70
71 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
72 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g4b.gen5"
73 };
74
75 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
76 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g4b.gen5"
77 };
78
79 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
80 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g4b.gen5"
81 };
82
83 static const uint32_t pp_nv12_scaling_gen5[][4] = {
84 #include "shaders/post_processing/gen5_6/nv12_scaling_nv12.g4b.gen5"
85 };
86
87 static const uint32_t pp_nv12_avs_gen5[][4] = {
88 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g4b.gen5"
89 };
90
91 static const uint32_t pp_nv12_dndi_gen5[][4] = {
92 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g4b.gen5"
93 };
94
95 static const uint32_t pp_nv12_dn_gen5[][4] = {
96 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g4b.gen5"
97 };
98
99 static const uint32_t pp_nv12_load_save_pa_gen5[][4] = {
100 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g4b.gen5"
101 };
102
103 static const uint32_t pp_pl3_load_save_pa_gen5[][4] = {
104 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g4b.gen5"
105 };
106
107 static const uint32_t pp_pa_load_save_nv12_gen5[][4] = {
108 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g4b.gen5"
109 };
110
111 static const uint32_t pp_pa_load_save_pl3_gen5[][4] = {
112 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g4b.gen5"
113 };
114
115 static const uint32_t pp_rgbx_load_save_nv12_gen5[][4] = {
116 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g4b.gen5"
117 };
118
119 static const uint32_t pp_nv12_load_save_rgbx_gen5[][4] = {
120 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g4b.gen5"
121 };
122
123 static VAStatus pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
124                                    const struct i965_surface *src_surface,
125                                    const VARectangle *src_rect,
126                                    struct i965_surface *dst_surface,
127                                    const VARectangle *dst_rect,
128                                    void *filter_param);
129 static VAStatus pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
130                                             const struct i965_surface *src_surface,
131                                             const VARectangle *src_rect,
132                                             struct i965_surface *dst_surface,
133                                             const VARectangle *dst_rect,
134                                             void *filter_param);
135 static VAStatus pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
136                                            const struct i965_surface *src_surface,
137                                            const VARectangle *src_rect,
138                                            struct i965_surface *dst_surface,
139                                            const VARectangle *dst_rect,
140                                            void *filter_param);
141 static VAStatus gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
142                                              const struct i965_surface *src_surface,
143                                              const VARectangle *src_rect,
144                                              struct i965_surface *dst_surface,
145                                              const VARectangle *dst_rect,
146                                              void *filter_param);
147 static VAStatus pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
148                                                 const struct i965_surface *src_surface,
149                                                 const VARectangle *src_rect,
150                                                 struct i965_surface *dst_surface,
151                                                 const VARectangle *dst_rect,
152                                                 void *filter_param);
153 static VAStatus pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
154                                         const struct i965_surface *src_surface,
155                                         const VARectangle *src_rect,
156                                         struct i965_surface *dst_surface,
157                                         const VARectangle *dst_rect,
158                                         void *filter_param);
159 static VAStatus pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
160                                       const struct i965_surface *src_surface,
161                                       const VARectangle *src_rect,
162                                       struct i965_surface *dst_surface,
163                                       const VARectangle *dst_rect,
164                                       void *filter_param);
165
166 static struct pp_module pp_modules_gen5[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen5,
172             sizeof(pp_null_gen5),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12_NV12",
182             PP_NV12_LOAD_SAVE_N12,
183             pp_nv12_load_save_nv12_gen5,
184             sizeof(pp_nv12_load_save_nv12_gen5),
185             NULL,
186         },
187
188         pp_plx_load_save_plx_initialize,
189     },
190
191     {
192         {
193             "NV12_PL3",
194             PP_NV12_LOAD_SAVE_PL3,
195             pp_nv12_load_save_pl3_gen5,
196             sizeof(pp_nv12_load_save_pl3_gen5),
197             NULL,
198         },
199
200         pp_plx_load_save_plx_initialize,
201     },
202
203     {
204         {
205             "PL3_NV12",
206             PP_PL3_LOAD_SAVE_N12,
207             pp_pl3_load_save_nv12_gen5,
208             sizeof(pp_pl3_load_save_nv12_gen5),
209             NULL,
210         },
211
212         pp_plx_load_save_plx_initialize,
213     },
214
215     {
216         {
217             "PL3_PL3",
218             PP_PL3_LOAD_SAVE_N12,
219             pp_pl3_load_save_pl3_gen5,
220             sizeof(pp_pl3_load_save_pl3_gen5),
221             NULL,
222         },
223
224         pp_plx_load_save_plx_initialize
225     },
226
227     {
228         {
229             "NV12 Scaling module",
230             PP_NV12_SCALING,
231             pp_nv12_scaling_gen5,
232             sizeof(pp_nv12_scaling_gen5),
233             NULL,
234         },
235
236         pp_nv12_scaling_initialize,
237     },
238
239     {
240         {
241             "NV12 AVS module",
242             PP_NV12_AVS,
243             pp_nv12_avs_gen5,
244             sizeof(pp_nv12_avs_gen5),
245             NULL,
246         },
247
248         pp_nv12_avs_initialize_nlas,
249     },
250
251     {
252         {
253             "NV12 DNDI module",
254             PP_NV12_DNDI,
255             pp_nv12_dndi_gen5,
256             sizeof(pp_nv12_dndi_gen5),
257             NULL,
258         },
259
260         pp_nv12_dndi_initialize,
261     },
262
263     {
264         {
265             "NV12 DN module",
266             PP_NV12_DN,
267             pp_nv12_dn_gen5,
268             sizeof(pp_nv12_dn_gen5),
269             NULL,
270         },
271
272         pp_nv12_dn_initialize,
273     },
274
275     {
276         {
277             "NV12_PA module",
278             PP_NV12_LOAD_SAVE_PA,
279             pp_nv12_load_save_pa_gen5,
280             sizeof(pp_nv12_load_save_pa_gen5),
281             NULL,
282         },
283     
284         pp_plx_load_save_plx_initialize,
285     },
286
287     {
288         {
289             "PL3_PA module",
290             PP_PL3_LOAD_SAVE_PA,
291             pp_pl3_load_save_pa_gen5,
292             sizeof(pp_pl3_load_save_pa_gen5),
293             NULL,
294         },
295     
296         pp_plx_load_save_plx_initialize,
297     },
298
299     {
300         {
301             "PA_NV12 module",
302             PP_PA_LOAD_SAVE_NV12,
303             pp_pa_load_save_nv12_gen5,
304             sizeof(pp_pa_load_save_nv12_gen5),
305             NULL,
306         },
307     
308         pp_plx_load_save_plx_initialize,
309     },
310
311     {
312         {
313             "PA_PL3 module",
314             PP_PA_LOAD_SAVE_PL3,
315             pp_pa_load_save_pl3_gen5,
316             sizeof(pp_pa_load_save_pl3_gen5),
317             NULL,
318         },
319     
320         pp_plx_load_save_plx_initialize,
321     },
322
323     {
324         {
325             "RGBX_NV12 module",
326             PP_RGBX_LOAD_SAVE_NV12,
327             pp_rgbx_load_save_nv12_gen5,
328             sizeof(pp_rgbx_load_save_nv12_gen5),
329             NULL,
330         },
331     
332         pp_plx_load_save_plx_initialize,
333     },
334             
335     {
336         {
337             "NV12_RGBX module",
338             PP_NV12_LOAD_SAVE_RGBX,
339             pp_nv12_load_save_rgbx_gen5,
340             sizeof(pp_nv12_load_save_rgbx_gen5),
341             NULL,
342         },
343     
344         pp_plx_load_save_plx_initialize,
345     },
346                     
347 };
348
349 static const uint32_t pp_null_gen6[][4] = {
350 #include "shaders/post_processing/gen5_6/null.g6b"
351 };
352
353 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
354 #include "shaders/post_processing/gen5_6/nv12_load_save_nv12.g6b"
355 };
356
357 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
358 #include "shaders/post_processing/gen5_6/nv12_load_save_pl3.g6b"
359 };
360
361 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
362 #include "shaders/post_processing/gen5_6/pl3_load_save_nv12.g6b"
363 };
364
365 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
366 #include "shaders/post_processing/gen5_6/pl3_load_save_pl3.g6b"
367 };
368
369 static const uint32_t pp_nv12_scaling_gen6[][4] = {
370 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
371 };
372
373 static const uint32_t pp_nv12_avs_gen6[][4] = {
374 #include "shaders/post_processing/gen5_6/nv12_avs_nv12.g6b"
375 };
376
377 static const uint32_t pp_nv12_dndi_gen6[][4] = {
378 #include "shaders/post_processing/gen5_6/nv12_dndi_nv12.g6b"
379 };
380
381 static const uint32_t pp_nv12_dn_gen6[][4] = {
382 #include "shaders/post_processing/gen5_6/nv12_dn_nv12.g6b"
383 };
384
385 static const uint32_t pp_nv12_load_save_pa_gen6[][4] = {
386 #include "shaders/post_processing/gen5_6/nv12_load_save_pa.g6b"
387 };
388
389 static const uint32_t pp_pl3_load_save_pa_gen6[][4] = {
390 #include "shaders/post_processing/gen5_6/pl3_load_save_pa.g6b"
391 };
392
393 static const uint32_t pp_pa_load_save_nv12_gen6[][4] = {
394 #include "shaders/post_processing/gen5_6/pa_load_save_nv12.g6b"
395 };
396
397 static const uint32_t pp_pa_load_save_pl3_gen6[][4] = {
398 #include "shaders/post_processing/gen5_6/pa_load_save_pl3.g6b"
399 };
400
401 static const uint32_t pp_rgbx_load_save_nv12_gen6[][4] = {
402 #include "shaders/post_processing/gen5_6/rgbx_load_save_nv12.g6b"
403 };
404
405 static const uint32_t pp_nv12_load_save_rgbx_gen6[][4] = {
406 #include "shaders/post_processing/gen5_6/nv12_load_save_rgbx.g6b"
407 };
408
409 static struct pp_module pp_modules_gen6[] = {
410     {
411         {
412             "NULL module (for testing)",
413             PP_NULL,
414             pp_null_gen6,
415             sizeof(pp_null_gen6),
416             NULL,
417         },
418
419         pp_null_initialize,
420     },
421
422     {
423         {
424             "NV12_NV12",
425             PP_NV12_LOAD_SAVE_N12,
426             pp_nv12_load_save_nv12_gen6,
427             sizeof(pp_nv12_load_save_nv12_gen6),
428             NULL,
429         },
430
431         pp_plx_load_save_plx_initialize,
432     },
433
434     {
435         {
436             "NV12_PL3",
437             PP_NV12_LOAD_SAVE_PL3,
438             pp_nv12_load_save_pl3_gen6,
439             sizeof(pp_nv12_load_save_pl3_gen6),
440             NULL,
441         },
442         
443         pp_plx_load_save_plx_initialize,
444     },
445
446     {
447         {
448             "PL3_NV12",
449             PP_PL3_LOAD_SAVE_N12,
450             pp_pl3_load_save_nv12_gen6,
451             sizeof(pp_pl3_load_save_nv12_gen6),
452             NULL,
453         },
454
455         pp_plx_load_save_plx_initialize,
456     },
457
458     {
459         {
460             "PL3_PL3",
461             PP_PL3_LOAD_SAVE_N12,
462             pp_pl3_load_save_pl3_gen6,
463             sizeof(pp_pl3_load_save_pl3_gen6),
464             NULL,
465         },
466
467         pp_plx_load_save_plx_initialize,
468     },
469
470     {
471         {
472             "NV12 Scaling module",
473             PP_NV12_SCALING,
474             pp_nv12_scaling_gen6,
475             sizeof(pp_nv12_scaling_gen6),
476             NULL,
477         },
478
479         gen6_nv12_scaling_initialize,
480     },
481
482     {
483         {
484             "NV12 AVS module",
485             PP_NV12_AVS,
486             pp_nv12_avs_gen6,
487             sizeof(pp_nv12_avs_gen6),
488             NULL,
489         },
490
491         pp_nv12_avs_initialize_nlas,
492     },
493
494     {
495         {
496             "NV12 DNDI module",
497             PP_NV12_DNDI,
498             pp_nv12_dndi_gen6,
499             sizeof(pp_nv12_dndi_gen6),
500             NULL,
501         },
502
503         pp_nv12_dndi_initialize,
504     },
505
506     {
507         {
508             "NV12 DN module",
509             PP_NV12_DN,
510             pp_nv12_dn_gen6,
511             sizeof(pp_nv12_dn_gen6),
512             NULL,
513         },
514
515         pp_nv12_dn_initialize,
516     },
517     {
518         {
519             "NV12_PA module",
520             PP_NV12_LOAD_SAVE_PA,
521             pp_nv12_load_save_pa_gen6,
522             sizeof(pp_nv12_load_save_pa_gen6),
523             NULL,
524         },
525     
526         pp_plx_load_save_plx_initialize,
527     },
528     
529     {
530         {
531             "PL3_PA module",
532             PP_PL3_LOAD_SAVE_PA,
533             pp_pl3_load_save_pa_gen6,
534             sizeof(pp_pl3_load_save_pa_gen6),
535             NULL,
536         },
537     
538         pp_plx_load_save_plx_initialize,
539     },
540     
541     {
542         {
543             "PA_NV12 module",
544             PP_PA_LOAD_SAVE_NV12,
545             pp_pa_load_save_nv12_gen6,
546             sizeof(pp_pa_load_save_nv12_gen6),
547             NULL,
548         },
549     
550         pp_plx_load_save_plx_initialize,
551     },
552
553     {
554         {
555             "PA_PL3 module",
556             PP_PA_LOAD_SAVE_PL3,
557             pp_pa_load_save_pl3_gen6,
558             sizeof(pp_pa_load_save_pl3_gen6),
559             NULL,
560         },
561     
562         pp_plx_load_save_plx_initialize,
563     },
564     
565     {
566         {
567             "RGBX_NV12 module",
568             PP_RGBX_LOAD_SAVE_NV12,
569             pp_rgbx_load_save_nv12_gen6,
570             sizeof(pp_rgbx_load_save_nv12_gen6),
571             NULL,
572         },
573     
574         pp_plx_load_save_plx_initialize,
575     },
576
577     {
578         {
579             "NV12_RGBX module",
580             PP_NV12_LOAD_SAVE_RGBX,
581             pp_nv12_load_save_rgbx_gen6,
582             sizeof(pp_nv12_load_save_rgbx_gen6),
583             NULL,
584         },
585     
586         pp_plx_load_save_plx_initialize,
587     },
588 };
589
590 static const uint32_t pp_null_gen7[][4] = {
591 };
592
593 static const uint32_t pp_nv12_load_save_nv12_gen7[][4] = {
594 #include "shaders/post_processing/gen7/pl2_to_pl2.g7b"
595 };
596
597 static const uint32_t pp_nv12_load_save_pl3_gen7[][4] = {
598 #include "shaders/post_processing/gen7/pl2_to_pl3.g7b"
599 };
600
601 static const uint32_t pp_pl3_load_save_nv12_gen7[][4] = {
602 #include "shaders/post_processing/gen7/pl3_to_pl2.g7b"
603 };
604
605 static const uint32_t pp_pl3_load_save_pl3_gen7[][4] = {
606 #include "shaders/post_processing/gen7/pl3_to_pl3.g7b"
607 };
608
609 static const uint32_t pp_nv12_scaling_gen7[][4] = {
610 #include "shaders/post_processing/gen7/avs.g7b"
611 };
612
613 static const uint32_t pp_nv12_avs_gen7[][4] = {
614 #include "shaders/post_processing/gen7/avs.g7b"
615 };
616
617 static const uint32_t pp_nv12_dndi_gen7[][4] = {
618 #include "shaders/post_processing/gen7/dndi.g7b"
619 };
620
621 static const uint32_t pp_nv12_dn_gen7[][4] = {
622 #include "shaders/post_processing/gen7/nv12_dn_nv12.g7b"
623 };
624 static const uint32_t pp_nv12_load_save_pa_gen7[][4] = {
625 #include "shaders/post_processing/gen7/pl2_to_pa.g7b"
626 };
627 static const uint32_t pp_pl3_load_save_pa_gen7[][4] = {
628 #include "shaders/post_processing/gen7/pl3_to_pa.g7b"
629 };
630 static const uint32_t pp_pa_load_save_nv12_gen7[][4] = {
631 #include "shaders/post_processing/gen7/pa_to_pl2.g7b"
632 };
633 static const uint32_t pp_pa_load_save_pl3_gen7[][4] = {
634 #include "shaders/post_processing/gen7/pa_to_pl3.g7b"
635 };
636 static const uint32_t pp_rgbx_load_save_nv12_gen7[][4] = {
637 #include "shaders/post_processing/gen7/rgbx_to_nv12.g7b"
638 };
639 static const uint32_t pp_nv12_load_save_rgbx_gen7[][4] = {
640 #include "shaders/post_processing/gen7/pl2_to_rgbx.g7b"
641 };
642
643 static VAStatus gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
644                                            const struct i965_surface *src_surface,
645                                            const VARectangle *src_rect,
646                                            struct i965_surface *dst_surface,
647                                            const VARectangle *dst_rect,
648                                            void *filter_param);
649 static VAStatus gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
650                                              const struct i965_surface *src_surface,
651                                              const VARectangle *src_rect,
652                                              struct i965_surface *dst_surface,
653                                              const VARectangle *dst_rect,
654                                              void *filter_param);
655 static VAStatus gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
656                                            const struct i965_surface *src_surface,
657                                            const VARectangle *src_rect,
658                                            struct i965_surface *dst_surface,
659                                            const VARectangle *dst_rect,
660                                            void *filter_param);
661
662 static VAStatus gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
663                                            const struct i965_surface *src_surface,
664                                            const VARectangle *src_rect,
665                                            struct i965_surface *dst_surface,
666                                            const VARectangle *dst_rect,
667                                            void *filter_param);
668
669 static struct pp_module pp_modules_gen7[] = {
670     {
671         {
672             "NULL module (for testing)",
673             PP_NULL,
674             pp_null_gen7,
675             sizeof(pp_null_gen7),
676             NULL,
677         },
678
679         pp_null_initialize,
680     },
681
682     {
683         {
684             "NV12_NV12",
685             PP_NV12_LOAD_SAVE_N12,
686             pp_nv12_load_save_nv12_gen7,
687             sizeof(pp_nv12_load_save_nv12_gen7),
688             NULL,
689         },
690
691         gen7_pp_plx_avs_initialize,
692     },
693
694     {
695         {
696             "NV12_PL3",
697             PP_NV12_LOAD_SAVE_PL3,
698             pp_nv12_load_save_pl3_gen7,
699             sizeof(pp_nv12_load_save_pl3_gen7),
700             NULL,
701         },
702         
703         gen7_pp_plx_avs_initialize,
704     },
705
706     {
707         {
708             "PL3_NV12",
709             PP_PL3_LOAD_SAVE_N12,
710             pp_pl3_load_save_nv12_gen7,
711             sizeof(pp_pl3_load_save_nv12_gen7),
712             NULL,
713         },
714
715         gen7_pp_plx_avs_initialize,
716     },
717
718     {
719         {
720             "PL3_PL3",
721             PP_PL3_LOAD_SAVE_N12,
722             pp_pl3_load_save_pl3_gen7,
723             sizeof(pp_pl3_load_save_pl3_gen7),
724             NULL,
725         },
726
727         gen7_pp_plx_avs_initialize,
728     },
729
730     {
731         {
732             "NV12 Scaling module",
733             PP_NV12_SCALING,
734             pp_nv12_scaling_gen7,
735             sizeof(pp_nv12_scaling_gen7),
736             NULL,
737         },
738
739         gen7_pp_plx_avs_initialize,
740     },
741
742     {
743         {
744             "NV12 AVS module",
745             PP_NV12_AVS,
746             pp_nv12_avs_gen7,
747             sizeof(pp_nv12_avs_gen7),
748             NULL,
749         },
750
751         gen7_pp_plx_avs_initialize,
752     },
753
754     {
755         {
756             "NV12 DNDI module",
757             PP_NV12_DNDI,
758             pp_nv12_dndi_gen7,
759             sizeof(pp_nv12_dndi_gen7),
760             NULL,
761         },
762
763         gen7_pp_nv12_dndi_initialize,
764     },
765
766     {
767         {
768             "NV12 DN module",
769             PP_NV12_DN,
770             pp_nv12_dn_gen7,
771             sizeof(pp_nv12_dn_gen7),
772             NULL,
773         },
774
775         gen7_pp_nv12_dn_initialize,
776     },
777     {
778         {
779             "NV12_PA module",
780             PP_NV12_LOAD_SAVE_PA,
781             pp_nv12_load_save_pa_gen7,
782             sizeof(pp_nv12_load_save_pa_gen7),
783             NULL,
784         },
785     
786         gen7_pp_plx_avs_initialize,
787     },
788
789     {
790         {
791             "PL3_PA module",
792             PP_PL3_LOAD_SAVE_PA,
793             pp_pl3_load_save_pa_gen7,
794             sizeof(pp_pl3_load_save_pa_gen7),
795             NULL,
796         },
797     
798         gen7_pp_plx_avs_initialize,
799     },
800
801     {
802         {
803             "PA_NV12 module",
804             PP_PA_LOAD_SAVE_NV12,
805             pp_pa_load_save_nv12_gen7,
806             sizeof(pp_pa_load_save_nv12_gen7),
807             NULL,
808         },
809     
810         gen7_pp_plx_avs_initialize,
811     },
812
813     {
814         {
815             "PA_PL3 module",
816             PP_PA_LOAD_SAVE_PL3,
817             pp_pa_load_save_pl3_gen7,
818             sizeof(pp_pa_load_save_pl3_gen7),
819             NULL,
820         },
821     
822         gen7_pp_plx_avs_initialize,
823     },
824     
825     {
826         {
827             "RGBX_NV12 module",
828             PP_RGBX_LOAD_SAVE_NV12,
829             pp_rgbx_load_save_nv12_gen7,
830             sizeof(pp_rgbx_load_save_nv12_gen7),
831             NULL,
832         },
833     
834         gen7_pp_rgbx_avs_initialize,
835     },
836
837     {
838         {
839             "NV12_RGBX module",
840             PP_NV12_LOAD_SAVE_RGBX,
841             pp_nv12_load_save_rgbx_gen7,
842             sizeof(pp_nv12_load_save_rgbx_gen7),
843             NULL,
844         },
845     
846         gen7_pp_plx_avs_initialize,
847     },
848             
849 };
850
851 static const uint32_t pp_null_gen75[][4] = {
852 };
853
854 static const uint32_t pp_nv12_load_save_nv12_gen75[][4] = {
855 #include "shaders/post_processing/gen7/pl2_to_pl2.g75b"
856 };
857
858 static const uint32_t pp_nv12_load_save_pl3_gen75[][4] = {
859 #include "shaders/post_processing/gen7/pl2_to_pl3.g75b"
860 };
861
862 static const uint32_t pp_pl3_load_save_nv12_gen75[][4] = {
863 #include "shaders/post_processing/gen7/pl3_to_pl2.g75b"
864 };
865
866 static const uint32_t pp_pl3_load_save_pl3_gen75[][4] = {
867 #include "shaders/post_processing/gen7/pl3_to_pl3.g75b"
868 };
869
870 static const uint32_t pp_nv12_scaling_gen75[][4] = {
871 #include "shaders/post_processing/gen7/avs.g75b"
872 };
873
874 static const uint32_t pp_nv12_avs_gen75[][4] = {
875 #include "shaders/post_processing/gen7/avs.g75b"
876 };
877
878 static const uint32_t pp_nv12_dndi_gen75[][4] = {
879 // #include "shaders/post_processing/gen7/dndi.g75b"
880 };
881
882 static const uint32_t pp_nv12_dn_gen75[][4] = {
883 // #include "shaders/post_processing/gen7/nv12_dn_nv12.g75b"
884 };
885 static const uint32_t pp_nv12_load_save_pa_gen75[][4] = {
886 #include "shaders/post_processing/gen7/pl2_to_pa.g75b"
887 };
888 static const uint32_t pp_pl3_load_save_pa_gen75[][4] = {
889 #include "shaders/post_processing/gen7/pl3_to_pa.g75b"
890 };
891 static const uint32_t pp_pa_load_save_nv12_gen75[][4] = {
892 #include "shaders/post_processing/gen7/pa_to_pl2.g75b"
893 };
894 static const uint32_t pp_pa_load_save_pl3_gen75[][4] = {
895 #include "shaders/post_processing/gen7/pa_to_pl3.g75b"
896 };
897 static const uint32_t pp_rgbx_load_save_nv12_gen75[][4] = {
898 #include "shaders/post_processing/gen7/rgbx_to_nv12.g75b"
899 };
900 static const uint32_t pp_nv12_load_save_rgbx_gen75[][4] = {
901 #include "shaders/post_processing/gen7/pl2_to_rgbx.g75b"
902 };
903
904 static struct pp_module pp_modules_gen75[] = {
905     {
906         {
907             "NULL module (for testing)",
908             PP_NULL,
909             pp_null_gen75,
910             sizeof(pp_null_gen75),
911             NULL,
912         },
913
914         pp_null_initialize,
915     },
916
917     {
918         {
919             "NV12_NV12",
920             PP_NV12_LOAD_SAVE_N12,
921             pp_nv12_load_save_nv12_gen75,
922             sizeof(pp_nv12_load_save_nv12_gen75),
923             NULL,
924         },
925
926         gen7_pp_plx_avs_initialize,
927     },
928
929     {
930         {
931             "NV12_PL3",
932             PP_NV12_LOAD_SAVE_PL3,
933             pp_nv12_load_save_pl3_gen75,
934             sizeof(pp_nv12_load_save_pl3_gen75),
935             NULL,
936         },
937         
938         gen7_pp_plx_avs_initialize,
939     },
940
941     {
942         {
943             "PL3_NV12",
944             PP_PL3_LOAD_SAVE_N12,
945             pp_pl3_load_save_nv12_gen75,
946             sizeof(pp_pl3_load_save_nv12_gen75),
947             NULL,
948         },
949
950         gen7_pp_plx_avs_initialize,
951     },
952
953     {
954         {
955             "PL3_PL3",
956             PP_PL3_LOAD_SAVE_N12,
957             pp_pl3_load_save_pl3_gen75,
958             sizeof(pp_pl3_load_save_pl3_gen75),
959             NULL,
960         },
961
962         gen7_pp_plx_avs_initialize,
963     },
964
965     {
966         {
967             "NV12 Scaling module",
968             PP_NV12_SCALING,
969             pp_nv12_scaling_gen75,
970             sizeof(pp_nv12_scaling_gen75),
971             NULL,
972         },
973
974         gen7_pp_plx_avs_initialize,
975     },
976
977     {
978         {
979             "NV12 AVS module",
980             PP_NV12_AVS,
981             pp_nv12_avs_gen75,
982             sizeof(pp_nv12_avs_gen75),
983             NULL,
984         },
985
986         gen7_pp_plx_avs_initialize,
987     },
988
989     {
990         {
991             "NV12 DNDI module",
992             PP_NV12_DNDI,
993             pp_nv12_dndi_gen75,
994             sizeof(pp_nv12_dndi_gen75),
995             NULL,
996         },
997
998         gen7_pp_nv12_dndi_initialize,
999     },
1000
1001     {
1002         {
1003             "NV12 DN module",
1004             PP_NV12_DN,
1005             pp_nv12_dn_gen75,
1006             sizeof(pp_nv12_dn_gen75),
1007             NULL,
1008         },
1009
1010         gen7_pp_nv12_dn_initialize,
1011     },
1012     {
1013         {
1014             "NV12_PA module",
1015             PP_NV12_LOAD_SAVE_PA,
1016             pp_nv12_load_save_pa_gen75,
1017             sizeof(pp_nv12_load_save_pa_gen75),
1018             NULL,
1019         },
1020     
1021         gen7_pp_plx_avs_initialize,
1022     },
1023
1024     {
1025         {
1026             "PL3_PA module",
1027             PP_PL3_LOAD_SAVE_PA,
1028             pp_pl3_load_save_pa_gen75,
1029             sizeof(pp_pl3_load_save_pa_gen75),
1030             NULL,
1031         },
1032     
1033         gen7_pp_plx_avs_initialize,
1034     },
1035
1036     {
1037         {
1038             "PA_NV12 module",
1039             PP_PA_LOAD_SAVE_NV12,
1040             pp_pa_load_save_nv12_gen75,
1041             sizeof(pp_pa_load_save_nv12_gen75),
1042             NULL,
1043         },
1044     
1045         gen7_pp_plx_avs_initialize,
1046     },
1047
1048     {
1049         {
1050             "PA_PL3 module",
1051             PP_PA_LOAD_SAVE_PL3,
1052             pp_pa_load_save_pl3_gen75,
1053             sizeof(pp_pa_load_save_pl3_gen75),
1054             NULL,
1055         },
1056     
1057         gen7_pp_plx_avs_initialize,
1058     },
1059     
1060     {
1061         {
1062             "RGBX_NV12 module",
1063             PP_RGBX_LOAD_SAVE_NV12,
1064             pp_rgbx_load_save_nv12_gen75,
1065             sizeof(pp_rgbx_load_save_nv12_gen75),
1066             NULL,
1067         },
1068     
1069         gen7_pp_rgbx_avs_initialize,
1070     },
1071
1072     {
1073         {
1074             "NV12_RGBX module",
1075             PP_NV12_LOAD_SAVE_RGBX,
1076             pp_nv12_load_save_rgbx_gen75,
1077             sizeof(pp_nv12_load_save_rgbx_gen75),
1078             NULL,
1079         },
1080     
1081         gen7_pp_plx_avs_initialize,
1082     },
1083             
1084 };
1085
1086 static int
1087 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
1088 {
1089     struct i965_driver_data *i965 = i965_driver_data(ctx);
1090     int fourcc;
1091
1092     if (surface->type == I965_SURFACE_TYPE_IMAGE) {
1093         struct object_image *obj_image = IMAGE(surface->id);
1094         fourcc = obj_image->image.format.fourcc;
1095     } else {
1096         struct object_surface *obj_surface = SURFACE(surface->id);
1097         fourcc = obj_surface->fourcc;
1098     }
1099
1100     return fourcc;
1101 }
1102
1103 static void
1104 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
1105 {
1106     switch (tiling) {
1107     case I915_TILING_NONE:
1108         ss->ss3.tiled_surface = 0;
1109         ss->ss3.tile_walk = 0;
1110         break;
1111     case I915_TILING_X:
1112         ss->ss3.tiled_surface = 1;
1113         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
1114         break;
1115     case I915_TILING_Y:
1116         ss->ss3.tiled_surface = 1;
1117         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
1118         break;
1119     }
1120 }
1121
1122 static void
1123 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
1124 {
1125     switch (tiling) {
1126     case I915_TILING_NONE:
1127         ss->ss2.tiled_surface = 0;
1128         ss->ss2.tile_walk = 0;
1129         break;
1130     case I915_TILING_X:
1131         ss->ss2.tiled_surface = 1;
1132         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1133         break;
1134     case I915_TILING_Y:
1135         ss->ss2.tiled_surface = 1;
1136         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1137         break;
1138     }
1139 }
1140
1141 static void
1142 gen7_pp_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
1143 {
1144     switch (tiling) {
1145     case I915_TILING_NONE:
1146         ss->ss0.tiled_surface = 0;
1147         ss->ss0.tile_walk = 0;
1148         break;
1149     case I915_TILING_X:
1150         ss->ss0.tiled_surface = 1;
1151         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1152         break;
1153     case I915_TILING_Y:
1154         ss->ss0.tiled_surface = 1;
1155         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1156         break;
1157     }
1158 }
1159
1160 static void
1161 gen7_pp_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
1162 {
1163     switch (tiling) {
1164     case I915_TILING_NONE:
1165         ss->ss2.tiled_surface = 0;
1166         ss->ss2.tile_walk = 0;
1167         break;
1168     case I915_TILING_X:
1169         ss->ss2.tiled_surface = 1;
1170         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1171         break;
1172     case I915_TILING_Y:
1173         ss->ss2.tiled_surface = 1;
1174         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1175         break;
1176     }
1177 }
1178
1179 static void
1180 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1181 {
1182     struct i965_interface_descriptor *desc;
1183     dri_bo *bo;
1184     int pp_index = pp_context->current_pp;
1185
1186     bo = pp_context->idrt.bo;
1187     dri_bo_map(bo, 1);
1188     assert(bo->virtual);
1189     desc = bo->virtual;
1190     memset(desc, 0, sizeof(*desc));
1191     desc->desc0.grf_reg_blocks = 10;
1192     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1193     desc->desc1.const_urb_entry_read_offset = 0;
1194     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
1195     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
1196     desc->desc2.sampler_count = 0;
1197     desc->desc3.binding_table_entry_count = 0;
1198     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
1199
1200     dri_bo_emit_reloc(bo,
1201                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1202                       desc->desc0.grf_reg_blocks,
1203                       offsetof(struct i965_interface_descriptor, desc0),
1204                       pp_context->pp_modules[pp_index].kernel.bo);
1205
1206     dri_bo_emit_reloc(bo,
1207                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1208                       desc->desc2.sampler_count << 2,
1209                       offsetof(struct i965_interface_descriptor, desc2),
1210                       pp_context->sampler_state_table.bo);
1211
1212     dri_bo_unmap(bo);
1213     pp_context->idrt.num_interface_descriptors++;
1214 }
1215
1216 static void
1217 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
1218 {
1219     struct i965_vfe_state *vfe_state;
1220     dri_bo *bo;
1221
1222     bo = pp_context->vfe_state.bo;
1223     dri_bo_map(bo, 1);
1224     assert(bo->virtual);
1225     vfe_state = bo->virtual;
1226     memset(vfe_state, 0, sizeof(*vfe_state));
1227     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
1228     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
1229     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
1230     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
1231     vfe_state->vfe1.children_present = 0;
1232     vfe_state->vfe2.interface_descriptor_base = 
1233         pp_context->idrt.bo->offset >> 4; /* reloc */
1234     dri_bo_emit_reloc(bo,
1235                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1236                       0,
1237                       offsetof(struct i965_vfe_state, vfe2),
1238                       pp_context->idrt.bo);
1239     dri_bo_unmap(bo);
1240 }
1241
1242 static void
1243 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
1244 {
1245     unsigned char *constant_buffer;
1246     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1247
1248     assert(sizeof(*pp_static_parameter) == 128);
1249     dri_bo_map(pp_context->curbe.bo, 1);
1250     assert(pp_context->curbe.bo->virtual);
1251     constant_buffer = pp_context->curbe.bo->virtual;
1252     memcpy(constant_buffer, pp_static_parameter, sizeof(*pp_static_parameter));
1253     dri_bo_unmap(pp_context->curbe.bo);
1254 }
1255
1256 static void
1257 ironlake_pp_states_setup(VADriverContextP ctx,
1258                          struct i965_post_processing_context *pp_context)
1259 {
1260     ironlake_pp_interface_descriptor_table(pp_context);
1261     ironlake_pp_vfe_state(pp_context);
1262     ironlake_pp_upload_constants(pp_context);
1263 }
1264
1265 static void
1266 ironlake_pp_pipeline_select(VADriverContextP ctx,
1267                             struct i965_post_processing_context *pp_context)
1268 {
1269     struct intel_batchbuffer *batch = pp_context->batch;
1270
1271     BEGIN_BATCH(batch, 1);
1272     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1273     ADVANCE_BATCH(batch);
1274 }
1275
1276 static void
1277 ironlake_pp_urb_layout(VADriverContextP ctx,
1278                        struct i965_post_processing_context *pp_context)
1279 {
1280     struct intel_batchbuffer *batch = pp_context->batch;
1281     unsigned int vfe_fence, cs_fence;
1282
1283     vfe_fence = pp_context->urb.cs_start;
1284     cs_fence = pp_context->urb.size;
1285
1286     BEGIN_BATCH(batch, 3);
1287     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 
1290               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
1291               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
1292     ADVANCE_BATCH(batch);
1293 }
1294
1295 static void
1296 ironlake_pp_state_base_address(VADriverContextP ctx,
1297                                struct i965_post_processing_context *pp_context)
1298 {
1299     struct intel_batchbuffer *batch = pp_context->batch;
1300
1301     BEGIN_BATCH(batch, 8);
1302     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1303     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1304     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1305     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1306     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1307     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1308     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1309     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1310     ADVANCE_BATCH(batch);
1311 }
1312
1313 static void
1314 ironlake_pp_state_pointers(VADriverContextP ctx,
1315                            struct i965_post_processing_context *pp_context)
1316 {
1317     struct intel_batchbuffer *batch = pp_context->batch;
1318
1319     BEGIN_BATCH(batch, 3);
1320     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
1321     OUT_BATCH(batch, 0);
1322     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1323     ADVANCE_BATCH(batch);
1324 }
1325
1326 static void 
1327 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
1328                           struct i965_post_processing_context *pp_context)
1329 {
1330     struct intel_batchbuffer *batch = pp_context->batch;
1331
1332     BEGIN_BATCH(batch, 2);
1333     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1334     OUT_BATCH(batch,
1335               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
1336               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
1337     ADVANCE_BATCH(batch);
1338 }
1339
1340 static void
1341 ironlake_pp_constant_buffer(VADriverContextP ctx,
1342                             struct i965_post_processing_context *pp_context)
1343 {
1344     struct intel_batchbuffer *batch = pp_context->batch;
1345
1346     BEGIN_BATCH(batch, 2);
1347     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1348     OUT_RELOC(batch, pp_context->curbe.bo,
1349               I915_GEM_DOMAIN_INSTRUCTION, 0,
1350               pp_context->urb.size_cs_entry - 1);
1351     ADVANCE_BATCH(batch);    
1352 }
1353
1354 static void
1355 ironlake_pp_object_walker(VADriverContextP ctx,
1356                           struct i965_post_processing_context *pp_context)
1357 {
1358     struct intel_batchbuffer *batch = pp_context->batch;
1359     int x, x_steps, y, y_steps;
1360     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1361
1362     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1363     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1364
1365     for (y = 0; y < y_steps; y++) {
1366         for (x = 0; x < x_steps; x++) {
1367             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1368                 BEGIN_BATCH(batch, 20);
1369                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
1370                 OUT_BATCH(batch, 0);
1371                 OUT_BATCH(batch, 0); /* no indirect data */
1372                 OUT_BATCH(batch, 0);
1373
1374                 /* inline data grf 5-6 */
1375                 assert(sizeof(*pp_inline_parameter) == 64);
1376                 intel_batchbuffer_data(batch, pp_inline_parameter, sizeof(*pp_inline_parameter));
1377
1378                 ADVANCE_BATCH(batch);
1379             }
1380         }
1381     }
1382 }
1383
1384 static void
1385 ironlake_pp_pipeline_setup(VADriverContextP ctx,
1386                            struct i965_post_processing_context *pp_context)
1387 {
1388     struct intel_batchbuffer *batch = pp_context->batch;
1389
1390     intel_batchbuffer_start_atomic(batch, 0x1000);
1391     intel_batchbuffer_emit_mi_flush(batch);
1392     ironlake_pp_pipeline_select(ctx, pp_context);
1393     ironlake_pp_state_base_address(ctx, pp_context);
1394     ironlake_pp_state_pointers(ctx, pp_context);
1395     ironlake_pp_urb_layout(ctx, pp_context);
1396     ironlake_pp_cs_urb_layout(ctx, pp_context);
1397     ironlake_pp_constant_buffer(ctx, pp_context);
1398     ironlake_pp_object_walker(ctx, pp_context);
1399     intel_batchbuffer_end_atomic(batch);
1400 }
1401
1402 // update u/v offset when the surface format are packed yuv
1403 static void i965_update_src_surface_static_parameter(
1404     VADriverContextP    ctx, 
1405     struct i965_post_processing_context *pp_context,
1406     const struct i965_surface *surface)
1407 {
1408     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1409     int fourcc = pp_get_surface_fourcc(ctx, surface);
1410
1411     switch (fourcc) {
1412     case VA_FOURCC('Y', 'U', 'Y', '2'):
1413         pp_static_parameter->grf1.source_packed_u_offset = 1;
1414         pp_static_parameter->grf1.source_packed_v_offset = 3;
1415         break;
1416     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1417         pp_static_parameter->grf1.source_packed_y_offset = 1;
1418         pp_static_parameter->grf1.source_packed_v_offset = 2;
1419         break;
1420     case VA_FOURCC('B', 'G', 'R', 'X'):
1421     case VA_FOURCC('B', 'G', 'R', 'A'):
1422         pp_static_parameter->grf1.source_rgb_layout = 0;
1423         break;
1424     case VA_FOURCC('R', 'G', 'B', 'X'):
1425     case VA_FOURCC('R', 'G', 'B', 'A'):
1426         pp_static_parameter->grf1.source_rgb_layout = 1;
1427         break;
1428     default:
1429         break;
1430     }
1431     
1432 }
1433
1434 static void i965_update_dst_surface_static_parameter(
1435     VADriverContextP    ctx, 
1436     struct i965_post_processing_context *pp_context,
1437     const struct i965_surface *surface)
1438 {
1439     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1440     int fourcc = pp_get_surface_fourcc(ctx, surface);
1441
1442     switch (fourcc) {
1443     case VA_FOURCC('Y', 'U', 'Y', '2'):
1444         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_u_offset = 1;
1445         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 3;
1446         break;
1447     case VA_FOURCC('U', 'Y', 'V', 'Y'):
1448         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_y_offset = 1;
1449         pp_static_parameter->grf1.r1_2.load_and_save.destination_packed_v_offset = 2;
1450         break;
1451     case VA_FOURCC('B', 'G', 'R', 'X'):
1452     case VA_FOURCC('B', 'G', 'R', 'A'):
1453         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 0;
1454         break;
1455     case VA_FOURCC('R', 'G', 'B', 'X'):
1456     case VA_FOURCC('R', 'G', 'B', 'A'):
1457         pp_static_parameter->grf1.r1_2.csc.destination_rgb_layout = 1;
1458         break;
1459     default:
1460         break;
1461     }
1462     
1463 }
1464
1465 static void
1466 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1467                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1468                           int width, int height, int pitch, int format, 
1469                           int index, int is_target)
1470 {
1471     struct i965_surface_state *ss;
1472     dri_bo *ss_bo;
1473     unsigned int tiling;
1474     unsigned int swizzle;
1475
1476     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1477     ss_bo = pp_context->surface_state_binding_table.bo;
1478     assert(ss_bo);
1479
1480     dri_bo_map(ss_bo, True);
1481     assert(ss_bo->virtual);
1482     ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1483     memset(ss, 0, sizeof(*ss));
1484     ss->ss0.surface_type = I965_SURFACE_2D;
1485     ss->ss0.surface_format = format;
1486     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1487     ss->ss2.width = width - 1;
1488     ss->ss2.height = height - 1;
1489     ss->ss3.pitch = pitch - 1;
1490     pp_set_surface_tiling(ss, tiling);
1491     dri_bo_emit_reloc(ss_bo,
1492                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1493                       surf_bo_offset,
1494                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
1495                       surf_bo);
1496     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1497     dri_bo_unmap(ss_bo);
1498 }
1499
1500 static void
1501 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1502                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1503                            int width, int height, int wpitch,
1504                            int xoffset, int yoffset,
1505                            int format, int interleave_chroma,
1506                            int index)
1507 {
1508     struct i965_surface_state2 *ss2;
1509     dri_bo *ss2_bo;
1510     unsigned int tiling;
1511     unsigned int swizzle;
1512
1513     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1514     ss2_bo = pp_context->surface_state_binding_table.bo;
1515     assert(ss2_bo);
1516
1517     dri_bo_map(ss2_bo, True);
1518     assert(ss2_bo->virtual);
1519     ss2 = (struct i965_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1520     memset(ss2, 0, sizeof(*ss2));
1521     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1522     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1523     ss2->ss1.width = width - 1;
1524     ss2->ss1.height = height - 1;
1525     ss2->ss2.pitch = wpitch - 1;
1526     ss2->ss2.interleave_chroma = interleave_chroma;
1527     ss2->ss2.surface_format = format;
1528     ss2->ss3.x_offset_for_cb = xoffset;
1529     ss2->ss3.y_offset_for_cb = yoffset;
1530     pp_set_surface2_tiling(ss2, tiling);
1531     dri_bo_emit_reloc(ss2_bo,
1532                       I915_GEM_DOMAIN_RENDER, 0,
1533                       surf_bo_offset,
1534                       SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state2, ss0),
1535                       surf_bo);
1536     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1537     dri_bo_unmap(ss2_bo);
1538 }
1539
1540 static void
1541 gen7_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1542                           dri_bo *surf_bo, unsigned long surf_bo_offset,
1543                           int width, int height, int pitch, int format, 
1544                           int index, int is_target)
1545 {
1546     struct i965_driver_data * const i965 = i965_driver_data(ctx);  
1547     struct gen7_surface_state *ss;
1548     dri_bo *ss_bo;
1549     unsigned int tiling;
1550     unsigned int swizzle;
1551
1552     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1553     ss_bo = pp_context->surface_state_binding_table.bo;
1554     assert(ss_bo);
1555
1556     dri_bo_map(ss_bo, True);
1557     assert(ss_bo->virtual);
1558     ss = (struct gen7_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
1559     memset(ss, 0, sizeof(*ss));
1560     ss->ss0.surface_type = I965_SURFACE_2D;
1561     ss->ss0.surface_format = format;
1562     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
1563     ss->ss2.width = width - 1;
1564     ss->ss2.height = height - 1;
1565     ss->ss3.pitch = pitch - 1;
1566     gen7_pp_set_surface_tiling(ss, tiling);
1567     if (IS_HASWELL(i965->intel.device_id))
1568         gen7_render_set_surface_scs(ss);
1569     dri_bo_emit_reloc(ss_bo,
1570                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
1571                       surf_bo_offset,
1572                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
1573                       surf_bo);
1574     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1575     dri_bo_unmap(ss_bo);
1576 }
1577
1578 static void
1579 gen7_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1580                            dri_bo *surf_bo, unsigned long surf_bo_offset,
1581                            int width, int height, int wpitch,
1582                            int xoffset, int yoffset,
1583                            int format, int interleave_chroma,
1584                            int index)
1585 {
1586     struct gen7_surface_state2 *ss2;
1587     dri_bo *ss2_bo;
1588     unsigned int tiling;
1589     unsigned int swizzle;
1590
1591     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
1592     ss2_bo = pp_context->surface_state_binding_table.bo;
1593     assert(ss2_bo);
1594
1595     dri_bo_map(ss2_bo, True);
1596     assert(ss2_bo->virtual);
1597     ss2 = (struct gen7_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index));
1598     memset(ss2, 0, sizeof(*ss2));
1599     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
1600     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
1601     ss2->ss1.width = width - 1;
1602     ss2->ss1.height = height - 1;
1603     ss2->ss2.pitch = wpitch - 1;
1604     ss2->ss2.interleave_chroma = interleave_chroma;
1605     ss2->ss2.surface_format = format;
1606     ss2->ss3.x_offset_for_cb = xoffset;
1607     ss2->ss3.y_offset_for_cb = yoffset;
1608     gen7_pp_set_surface2_tiling(ss2, tiling);
1609     dri_bo_emit_reloc(ss2_bo,
1610                       I915_GEM_DOMAIN_RENDER, 0,
1611                       surf_bo_offset,
1612                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
1613                       surf_bo);
1614     ((unsigned int *)((char *)ss2_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
1615     dri_bo_unmap(ss2_bo);
1616 }
1617
1618 static void 
1619 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1620                                 const struct i965_surface *surface, 
1621                                 int base_index, int is_target,
1622                                 int *width, int *height, int *pitch, int *offset)
1623 {
1624     struct i965_driver_data *i965 = i965_driver_data(ctx);
1625     struct object_surface *obj_surface;
1626     struct object_image *obj_image;
1627     dri_bo *bo;
1628     int fourcc = pp_get_surface_fourcc(ctx, surface);
1629     const int Y = 0;
1630     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
1631     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
1632     const int UV = 1;
1633     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1634     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')); 
1635     int full_packed_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1636                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1637                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1638                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1639     int scale_factor_of_1st_plane_width_in_byte = 1;
1640                               
1641     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1642         obj_surface = SURFACE(surface->id);
1643         bo = obj_surface->bo;
1644         width[0] = obj_surface->orig_width;
1645         height[0] = obj_surface->orig_height;
1646         pitch[0] = obj_surface->width;
1647         offset[0] = 0;
1648
1649         if (full_packed_format) {
1650             scale_factor_of_1st_plane_width_in_byte = 4; 
1651             pitch[0] = obj_surface->width * 4;
1652         }
1653         else if (packed_yuv ) {
1654             scale_factor_of_1st_plane_width_in_byte =  2; 
1655             pitch[0] = obj_surface->width * 2;
1656         }
1657         else if (interleaved_uv) {
1658             width[1] = obj_surface->orig_width;
1659             height[1] = obj_surface->orig_height / 2;
1660             pitch[1] = obj_surface->width;
1661             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1662         } else {
1663             width[1] = obj_surface->orig_width / 2;
1664             height[1] = obj_surface->orig_height / 2;
1665             pitch[1] = obj_surface->width / 2;
1666             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
1667             width[2] = obj_surface->orig_width / 2;
1668             height[2] = obj_surface->orig_height / 2;
1669             pitch[2] = obj_surface->width / 2;
1670             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
1671         }
1672     } else {
1673         obj_image = IMAGE(surface->id);
1674         bo = obj_image->bo;
1675         width[0] = obj_image->image.width;
1676         height[0] = obj_image->image.height;
1677         pitch[0] = obj_image->image.pitches[0];
1678         offset[0] = obj_image->image.offsets[0];
1679
1680         if (full_packed_format) {
1681             scale_factor_of_1st_plane_width_in_byte = 4;
1682         }
1683         else if (packed_yuv ) {
1684             scale_factor_of_1st_plane_width_in_byte = 2;
1685         }
1686         else if (interleaved_uv) {
1687             width[1] = obj_image->image.width;
1688             height[1] = obj_image->image.height / 2;
1689             pitch[1] = obj_image->image.pitches[1];
1690             offset[1] = obj_image->image.offsets[1];
1691         } else {
1692             width[1] = obj_image->image.width / 2;
1693             height[1] = obj_image->image.height / 2;
1694             pitch[1] = obj_image->image.pitches[1];
1695             offset[1] = obj_image->image.offsets[1];
1696             width[2] = obj_image->image.width / 2;
1697             height[2] = obj_image->image.height / 2;
1698             pitch[2] = obj_image->image.pitches[2];
1699             offset[2] = obj_image->image.offsets[2];
1700         }
1701     }
1702
1703     /* Y surface */
1704     i965_pp_set_surface_state(ctx, pp_context,
1705                               bo, offset[Y],
1706                               width[Y] *scale_factor_of_1st_plane_width_in_byte / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
1707                               base_index, is_target);
1708
1709     if (!packed_yuv && !full_packed_format) {
1710         if (interleaved_uv) {
1711             i965_pp_set_surface_state(ctx, pp_context,
1712                                       bo, offset[UV],
1713                                       width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
1714                                       base_index + 1, is_target);
1715         } else {
1716             /* U surface */
1717             i965_pp_set_surface_state(ctx, pp_context,
1718                                       bo, offset[U],
1719                                       width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
1720                                       base_index + 1, is_target);
1721
1722             /* V surface */
1723             i965_pp_set_surface_state(ctx, pp_context,
1724                                       bo, offset[V],
1725                                       width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
1726                                       base_index + 2, is_target);
1727         }
1728     }
1729
1730 }
1731
1732 static void 
1733 gen7_pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1734                                      const struct i965_surface *surface, 
1735                                      int base_index, int is_target,
1736                                      int *width, int *height, int *pitch, int *offset)
1737 {
1738     struct i965_driver_data *i965 = i965_driver_data(ctx);
1739     struct object_surface *obj_surface;
1740     struct object_image *obj_image;
1741     dri_bo *bo;
1742     int fourcc = pp_get_surface_fourcc(ctx, surface);
1743     const int U = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1744                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 2 : 1;
1745     const int V = (fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
1746                    fourcc == VA_FOURCC('I', 'M', 'C', '1')) ? 1 : 2;
1747     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
1748     int packed_yuv = (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') || fourcc == VA_FOURCC('U', 'Y', 'V', 'Y'));
1749     int rgbx_format = (fourcc == VA_FOURCC('R', 'G', 'B', 'A') || 
1750                               fourcc == VA_FOURCC('R', 'G', 'B', 'X') || 
1751                               fourcc == VA_FOURCC('B', 'G', 'R', 'A') || 
1752                               fourcc == VA_FOURCC('B', 'G', 'R', 'X'));
1753
1754     if (surface->type == I965_SURFACE_TYPE_SURFACE) {
1755         obj_surface = SURFACE(surface->id);
1756         bo = obj_surface->bo;
1757         width[0] = obj_surface->orig_width;
1758         height[0] = obj_surface->orig_height;
1759         pitch[0] = obj_surface->width;
1760         offset[0] = 0;
1761
1762         if (packed_yuv) {
1763             if (is_target)
1764                 width[0] = obj_surface->orig_width * 2; /* surface format is R8, so double the width */
1765             else
1766                 width[0] = obj_surface->orig_width;     /* surface foramt is YCBCR, width is specified in units of pixels */
1767
1768             pitch[0] = obj_surface->width * 2;
1769         } else if (rgbx_format) {
1770             if (is_target)
1771                 width[0] = obj_surface->orig_width * 4; /* surface format is R8, so quad the width */
1772         }
1773
1774         width[1] = obj_surface->cb_cr_width;
1775         height[1] = obj_surface->cb_cr_height;
1776         pitch[1] = obj_surface->cb_cr_pitch;
1777         offset[1] = obj_surface->y_cb_offset * obj_surface->width;
1778
1779         width[2] = obj_surface->cb_cr_width;
1780         height[2] = obj_surface->cb_cr_height;
1781         pitch[2] = obj_surface->cb_cr_pitch;
1782         offset[2] = obj_surface->y_cr_offset * obj_surface->width;
1783     } else {
1784         obj_image = IMAGE(surface->id);
1785         bo = obj_image->bo;
1786         width[0] = obj_image->image.width;
1787         height[0] = obj_image->image.height;
1788         pitch[0] = obj_image->image.pitches[0];
1789         offset[0] = obj_image->image.offsets[0];
1790
1791         if (rgbx_format) {
1792             if (is_target)
1793                 width[0] = obj_image->image.width * 4; /* surface format is R8, so quad the width */
1794         } else if (packed_yuv) {
1795             if (is_target)
1796                 width[0] = obj_image->image.width * 2;  /* surface format is R8, so double the width */
1797             else
1798                 width[0] = obj_image->image.width;      /* surface foramt is YCBCR, width is specified in units of pixels */
1799         } else if (interleaved_uv) {
1800             width[1] = obj_image->image.width / 2;
1801             height[1] = obj_image->image.height / 2;
1802             pitch[1] = obj_image->image.pitches[1];
1803             offset[1] = obj_image->image.offsets[1];
1804         } else {
1805             width[1] = obj_image->image.width / 2;
1806             height[1] = obj_image->image.height / 2;
1807             pitch[1] = obj_image->image.pitches[U];
1808             offset[1] = obj_image->image.offsets[U];
1809             width[2] = obj_image->image.width / 2;
1810             height[2] = obj_image->image.height / 2;
1811             pitch[2] = obj_image->image.pitches[V];
1812             offset[2] = obj_image->image.offsets[V];
1813         }
1814     }
1815
1816     if (is_target) {
1817         gen7_pp_set_surface_state(ctx, pp_context,
1818                                   bo, 0,
1819                                   width[0] / 4, height[0], pitch[0],
1820                                   I965_SURFACEFORMAT_R8_UINT,
1821                                   base_index, 1);
1822         if (rgbx_format) {
1823                 struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1824                 /* the format is MSB: X-B-G-R */
1825                 pp_static_parameter->grf2.save_avs_rgb_swap = 0;
1826                 if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
1827                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
1828                         /* It is stored as MSB: X-R-G-B */
1829                         pp_static_parameter->grf2.save_avs_rgb_swap = 1;
1830                 }
1831         }
1832         if (!packed_yuv && !rgbx_format) {
1833             if (interleaved_uv) {
1834                 gen7_pp_set_surface_state(ctx, pp_context,
1835                                           bo, offset[1],
1836                                           width[1] / 2, height[1], pitch[1],
1837                                           I965_SURFACEFORMAT_R8G8_SINT,
1838                                           base_index + 1, 1);
1839             } else {
1840                 gen7_pp_set_surface_state(ctx, pp_context,
1841                                           bo, offset[1],
1842                                           width[1] / 4, height[1], pitch[1],
1843                                           I965_SURFACEFORMAT_R8_SINT,
1844                                           base_index + 1, 1);
1845                 gen7_pp_set_surface_state(ctx, pp_context,
1846                                           bo, offset[2],
1847                                           width[2] / 4, height[2], pitch[2],
1848                                           I965_SURFACEFORMAT_R8_SINT,
1849                                           base_index + 2, 1);
1850             }
1851         }
1852     } else {
1853         int format0 = SURFACE_FORMAT_Y8_UNORM;
1854
1855         switch (fourcc) {
1856         case VA_FOURCC('Y', 'U', 'Y', '2'):
1857             format0 = SURFACE_FORMAT_YCRCB_NORMAL;
1858             break;
1859
1860         case VA_FOURCC('U', 'Y', 'V', 'Y'):
1861             format0 = SURFACE_FORMAT_YCRCB_SWAPY;
1862             break;
1863
1864         default:
1865             break;
1866         }
1867         if (rgbx_format) {
1868             struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
1869             /* Only R8G8B8A8_UNORM is supported for BGRX or RGBX */
1870             format0 = SURFACE_FORMAT_R8G8B8A8_UNORM;
1871             pp_static_parameter->grf2.src_avs_rgb_swap = 0;
1872             if ((fourcc == VA_FOURCC('B', 'G', 'R', 'A')) || 
1873                               (fourcc == VA_FOURCC('B', 'G', 'R', 'X'))) {
1874                 pp_static_parameter->grf2.src_avs_rgb_swap = 1;
1875             }
1876         }
1877         gen7_pp_set_surface2_state(ctx, pp_context,
1878                                    bo, offset[0],
1879                                    width[0], height[0], pitch[0],
1880                                    0, 0,
1881                                    format0, 0,
1882                                    base_index);
1883
1884         if (!packed_yuv && !rgbx_format) {
1885             if (interleaved_uv) {
1886                 gen7_pp_set_surface2_state(ctx, pp_context,
1887                                            bo, offset[1],
1888                                            width[1], height[1], pitch[1],
1889                                            0, 0,
1890                                            SURFACE_FORMAT_R8B8_UNORM, 0,
1891                                            base_index + 1);
1892             } else {
1893                 gen7_pp_set_surface2_state(ctx, pp_context,
1894                                            bo, offset[1],
1895                                            width[1], height[1], pitch[1],
1896                                            0, 0,
1897                                            SURFACE_FORMAT_R8_UNORM, 0,
1898                                            base_index + 1);
1899                 gen7_pp_set_surface2_state(ctx, pp_context,
1900                                            bo, offset[2],
1901                                            width[2], height[2], pitch[2],
1902                                            0, 0,
1903                                            SURFACE_FORMAT_R8_UNORM, 0,
1904                                            base_index + 2);
1905             }
1906         }
1907     }
1908 }
1909
1910 static int
1911 pp_null_x_steps(void *private_context)
1912 {
1913     return 1;
1914 }
1915
1916 static int
1917 pp_null_y_steps(void *private_context)
1918 {
1919     return 1;
1920 }
1921
1922 static int
1923 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1924 {
1925     return 0;
1926 }
1927
1928 static VAStatus
1929 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1930                    const struct i965_surface *src_surface,
1931                    const VARectangle *src_rect,
1932                    struct i965_surface *dst_surface,
1933                    const VARectangle *dst_rect,
1934                    void *filter_param)
1935 {
1936     /* private function & data */
1937     pp_context->pp_x_steps = pp_null_x_steps;
1938     pp_context->pp_y_steps = pp_null_y_steps;
1939     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
1940
1941     dst_surface->flags = src_surface->flags;
1942
1943     return VA_STATUS_SUCCESS;
1944 }
1945
1946 static int
1947 pp_load_save_x_steps(void *private_context)
1948 {
1949     return 1;
1950 }
1951
1952 static int
1953 pp_load_save_y_steps(void *private_context)
1954 {
1955     struct pp_load_save_context *pp_load_save_context = private_context;
1956
1957     return pp_load_save_context->dest_h / 8;
1958 }
1959
1960 static int
1961 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1962 {
1963     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
1964     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
1965
1966     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_load_save_context->dest_x;
1967     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_load_save_context->dest_y;
1968
1969     return 0;
1970 }
1971
1972 static void calculate_boundary_block_mask(struct i965_post_processing_context *pp_context, const VARectangle *dst_rect)
1973 {
1974     int i;
1975     /* x offset of dest surface must be dword aligned.
1976      * so we have to extend dst surface on left edge, and mask out pixels not interested
1977      */
1978     if (dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT) {
1979         pp_context->block_horizontal_mask_left = 0;
1980         for (i=dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; i<GPU_ASM_BLOCK_WIDTH; i++)
1981         {
1982             pp_context->block_horizontal_mask_left |= 1<<i;
1983         }
1984     }
1985     else {
1986         pp_context->block_horizontal_mask_left = 0xffff;
1987     }
1988     
1989     int dst_width_adjust = dst_rect->width + dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT; 
1990     if (dst_width_adjust%GPU_ASM_BLOCK_WIDTH){
1991         pp_context->block_horizontal_mask_right = (1 << (dst_width_adjust%GPU_ASM_BLOCK_WIDTH)) - 1;
1992     }
1993     else {
1994         pp_context->block_horizontal_mask_right = 0xffff;
1995     }
1996     
1997     if (dst_rect->height%GPU_ASM_BLOCK_HEIGHT){
1998         pp_context->block_vertical_mask_bottom = (1 << (dst_rect->height%GPU_ASM_BLOCK_HEIGHT)) - 1;
1999     }
2000     else {
2001         pp_context->block_vertical_mask_bottom = 0xff;
2002     }
2003
2004 }
2005 static VAStatus
2006 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2007                                 const struct i965_surface *src_surface,
2008                                 const VARectangle *src_rect,
2009                                 struct i965_surface *dst_surface,
2010                                 const VARectangle *dst_rect,
2011                                 void *filter_param)
2012 {
2013     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
2014     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2015     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2016     int width[3], height[3], pitch[3], offset[3];
2017     const int Y = 0;
2018
2019     /* source surface */
2020     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
2021                                     width, height, pitch, offset);
2022
2023     /* destination surface */
2024     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
2025                                     width, height, pitch, offset);
2026
2027     /* private function & data */
2028     pp_context->pp_x_steps = pp_load_save_x_steps;
2029     pp_context->pp_y_steps = pp_load_save_y_steps;
2030     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
2031
2032     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;;
2033     pp_load_save_context->dest_x = dst_rect->x - dst_left_edge_extend;
2034     pp_load_save_context->dest_y = dst_rect->y;
2035     pp_load_save_context->dest_h = ALIGN(dst_rect->height, 8);
2036     pp_load_save_context->dest_w = ALIGN(dst_rect->width+dst_left_edge_extend, 16);
2037
2038     pp_inline_parameter->grf5.block_count_x = pp_load_save_context->dest_w / 16;   /* 1 x N */
2039     pp_inline_parameter->grf5.number_blocks = pp_load_save_context->dest_w / 16;
2040
2041     pp_static_parameter->grf3.horizontal_origin_offset = src_rect->x;
2042     pp_static_parameter->grf3.vertical_origin_offset = src_rect->y;
2043
2044     // update u/v offset for packed yuv
2045     i965_update_src_surface_static_parameter (ctx, pp_context, src_surface);
2046     i965_update_dst_surface_static_parameter (ctx, pp_context, dst_surface);
2047
2048     dst_surface->flags = src_surface->flags;
2049
2050     return VA_STATUS_SUCCESS;
2051 }
2052
2053 static int
2054 pp_scaling_x_steps(void *private_context)
2055 {
2056     return 1;
2057 }
2058
2059 static int
2060 pp_scaling_y_steps(void *private_context)
2061 {
2062     struct pp_scaling_context *pp_scaling_context = private_context;
2063
2064     return pp_scaling_context->dest_h / 8;
2065 }
2066
2067 static int
2068 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2069 {
2070     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2071     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2072     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2073     float src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2074     float src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2075
2076     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
2077     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
2078     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
2079     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
2080     
2081     return 0;
2082 }
2083
2084 static VAStatus
2085 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2086                            const struct i965_surface *src_surface,
2087                            const VARectangle *src_rect,
2088                            struct i965_surface *dst_surface,
2089                            const VARectangle *dst_rect,
2090                            void *filter_param)
2091 {
2092     struct i965_driver_data *i965 = i965_driver_data(ctx);
2093     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
2094     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2095     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2096     struct object_surface *obj_surface;
2097     struct i965_sampler_state *sampler_state;
2098     int in_w, in_h, in_wpitch, in_hpitch;
2099     int out_w, out_h, out_wpitch, out_hpitch;
2100
2101     /* source surface */
2102     obj_surface = SURFACE(src_surface->id);
2103     in_w = obj_surface->orig_width;
2104     in_h = obj_surface->orig_height;
2105     in_wpitch = obj_surface->width;
2106     in_hpitch = obj_surface->height;
2107
2108     /* source Y surface index 1 */
2109     i965_pp_set_surface_state(ctx, pp_context,
2110                               obj_surface->bo, 0,
2111                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2112                               1, 0);
2113
2114     /* source UV surface index 2 */
2115     i965_pp_set_surface_state(ctx, pp_context,
2116                               obj_surface->bo, in_wpitch * in_hpitch,
2117                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2118                               2, 0);
2119
2120     /* destination surface */
2121     obj_surface = SURFACE(dst_surface->id);
2122     out_w = obj_surface->orig_width;
2123     out_h = obj_surface->orig_height;
2124     out_wpitch = obj_surface->width;
2125     out_hpitch = obj_surface->height;
2126
2127     /* destination Y surface index 7 */
2128     i965_pp_set_surface_state(ctx, pp_context,
2129                               obj_surface->bo, 0,
2130                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2131                               7, 1);
2132
2133     /* destination UV surface index 8 */
2134     i965_pp_set_surface_state(ctx, pp_context,
2135                               obj_surface->bo, out_wpitch * out_hpitch,
2136                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2137                               8, 1);
2138
2139     /* sampler state */
2140     dri_bo_map(pp_context->sampler_state_table.bo, True);
2141     assert(pp_context->sampler_state_table.bo->virtual);
2142     sampler_state = pp_context->sampler_state_table.bo->virtual;
2143
2144     /* SIMD16 Y index 1 */
2145     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
2146     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2147     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2148     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2149     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2150
2151     /* SIMD16 UV index 2 */
2152     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
2153     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
2154     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2155     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2156     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2157
2158     dri_bo_unmap(pp_context->sampler_state_table.bo);
2159
2160     /* private function & data */
2161     pp_context->pp_x_steps = pp_scaling_x_steps;
2162     pp_context->pp_y_steps = pp_scaling_y_steps;
2163     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
2164
2165     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2166     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2167     pp_scaling_context->dest_x = dst_rect->x - dst_left_edge_extend;
2168     pp_scaling_context->dest_y = dst_rect->y;
2169     pp_scaling_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2170     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 8);
2171     pp_scaling_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2172     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h;
2173
2174     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2175
2176     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2177     pp_inline_parameter->grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
2178     pp_inline_parameter->grf5.number_blocks = pp_scaling_context->dest_w / 16;
2179
2180     dst_surface->flags = src_surface->flags;
2181
2182     return VA_STATUS_SUCCESS;
2183 }
2184
2185 static int
2186 pp_avs_x_steps(void *private_context)
2187 {
2188     struct pp_avs_context *pp_avs_context = private_context;
2189
2190     return pp_avs_context->dest_w / 16;
2191 }
2192
2193 static int
2194 pp_avs_y_steps(void *private_context)
2195 {
2196     return 1;
2197 }
2198
2199 static int
2200 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2201 {
2202     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2203     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2204     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2205     float src_x_steping, src_y_steping, video_step_delta;
2206     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
2207
2208     if (pp_static_parameter->grf4.r4_2.avs.nlas == 0) {
2209         src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2210         pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_avs_context->src_normalized_x;
2211     } else if (tmp_w >= pp_avs_context->dest_w) {
2212         pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2213         pp_inline_parameter->grf6.video_step_delta = 0;
2214         
2215         if (x == 0) {
2216             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
2217                 pp_avs_context->src_normalized_x;
2218         } else {
2219             src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2220             video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2221             pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2222                 16 * 15 * video_step_delta / 2;
2223         }
2224     } else {
2225         int n0, n1, n2, nls_left, nls_right;
2226         int factor_a = 5, factor_b = 4;
2227         float f;
2228
2229         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
2230         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
2231         n2 = tmp_w / (16 * factor_a);
2232         nls_left = n0 + n2;
2233         nls_right = n1 + n2;
2234         f = (float) n2 * 16 / tmp_w;
2235         
2236         if (n0 < 5) {
2237             pp_inline_parameter->grf6.video_step_delta = 0.0;
2238
2239             if (x == 0) {
2240                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
2241                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2242             } else {
2243                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2244                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2245                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2246                     16 * 15 * video_step_delta / 2;
2247             }
2248         } else {
2249             if (x < nls_left) {
2250                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
2251                 float a = f / (nls_left * 16 * factor_b);
2252                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
2253                 
2254                 pp_inline_parameter->grf6.video_step_delta = b;
2255
2256                 if (x == 0) {
2257                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
2258                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a;
2259                 } else {
2260                     src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2261                     video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2262                     pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2263                         16 * 15 * video_step_delta / 2;
2264                     pp_inline_parameter->grf5.normalized_video_x_scaling_step += 16 * b;
2265                 }
2266             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
2267                 /* scale the center linearly */
2268                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2269                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2270                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2271                     16 * 15 * video_step_delta / 2;
2272                 pp_inline_parameter->grf6.video_step_delta = 0.0;
2273                 pp_inline_parameter->grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
2274             } else {
2275                 float a = f / (nls_right * 16 * factor_b);
2276                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
2277
2278                 src_x_steping = pp_inline_parameter->grf5.normalized_video_x_scaling_step;
2279                 video_step_delta = pp_inline_parameter->grf6.video_step_delta;
2280                 pp_inline_parameter->grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
2281                     16 * 15 * video_step_delta / 2;
2282                 pp_inline_parameter->grf6.video_step_delta = -b;
2283
2284                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
2285                     pp_inline_parameter->grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
2286                 else
2287                     pp_inline_parameter->grf5.normalized_video_x_scaling_step -= b * 16;
2288             }
2289         }
2290     }
2291
2292     src_y_steping = pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step;
2293     pp_inline_parameter->grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
2294     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2295     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
2296
2297     return 0;
2298 }
2299
2300 static VAStatus
2301 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2302                        const struct i965_surface *src_surface,
2303                        const VARectangle *src_rect,
2304                        struct i965_surface *dst_surface,
2305                        const VARectangle *dst_rect,
2306                        void *filter_param,
2307                        int nlas)
2308 {
2309     struct i965_driver_data *i965 = i965_driver_data(ctx);
2310     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2311     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2312     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2313     struct object_surface *obj_surface;
2314     struct i965_sampler_8x8 *sampler_8x8;
2315     struct i965_sampler_8x8_state *sampler_8x8_state;
2316     int index;
2317     int in_w, in_h, in_wpitch, in_hpitch;
2318     int out_w, out_h, out_wpitch, out_hpitch;
2319     int i;
2320
2321     /* surface */
2322     obj_surface = SURFACE(src_surface->id);
2323     in_w = obj_surface->orig_width;
2324     in_h = obj_surface->orig_height;
2325     in_wpitch = obj_surface->width;
2326     in_hpitch = obj_surface->height;
2327
2328     /* source Y surface index 1 */
2329     i965_pp_set_surface2_state(ctx, pp_context,
2330                                obj_surface->bo, 0,
2331                                in_w, in_h, in_wpitch,
2332                                0, 0,
2333                                SURFACE_FORMAT_Y8_UNORM, 0,
2334                                1);
2335
2336     /* source UV surface index 2 */
2337     i965_pp_set_surface2_state(ctx, pp_context,
2338                                obj_surface->bo, in_wpitch * in_hpitch,
2339                                in_w / 2, in_h / 2, in_wpitch,
2340                                0, 0,
2341                                SURFACE_FORMAT_R8B8_UNORM, 0,
2342                                2);
2343
2344     /* destination surface */
2345     obj_surface = SURFACE(dst_surface->id);
2346     out_w = obj_surface->orig_width;
2347     out_h = obj_surface->orig_height;
2348     out_wpitch = obj_surface->width;
2349     out_hpitch = obj_surface->height;
2350     assert(out_w <= out_wpitch && out_h <= out_hpitch);
2351
2352     /* destination Y surface index 7 */
2353     i965_pp_set_surface_state(ctx, pp_context,
2354                               obj_surface->bo, 0,
2355                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
2356                               7, 1);
2357
2358     /* destination UV surface index 8 */
2359     i965_pp_set_surface_state(ctx, pp_context,
2360                               obj_surface->bo, out_wpitch * out_hpitch,
2361                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
2362                               8, 1);
2363
2364     /* sampler 8x8 state */
2365     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2366     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2367     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2368     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2369     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2370
2371     for (i = 0; i < 17; i++) {
2372         /* for Y channel, currently ignore */
2373         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x00;
2374         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x00;
2375         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x08;
2376         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = 0x18;
2377         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = 0x18;
2378         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x08;
2379         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x00;
2380         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x00;
2381         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x00;
2382         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x00;
2383         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x10;
2384         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = 0x10;
2385         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = 0x10;
2386         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x10;
2387         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x00;
2388         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x00;
2389         /* for U/V channel, 0.25 */
2390         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2391         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2392         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x10;
2393         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = 0x10;
2394         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = 0x10;
2395         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x10;
2396         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2397         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2398         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2399         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2400         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x10;
2401         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = 0x10;
2402         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = 0x10;
2403         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x10;
2404         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2405         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2406     }
2407
2408     sampler_8x8_state->dw136.default_sharpness_level = 0;
2409     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2410     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2411     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2412     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2413
2414     /* sampler 8x8 */
2415     dri_bo_map(pp_context->sampler_state_table.bo, True);
2416     assert(pp_context->sampler_state_table.bo->virtual);
2417     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
2418     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2419
2420     /* sample_8x8 Y index 1 */
2421     index = 1;
2422     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2423     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2424     sampler_8x8[index].dw0.ief_bypass = 1;
2425     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2426     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2427     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2428     sampler_8x8[index].dw2.global_noise_estimation = 22;
2429     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2430     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2431     sampler_8x8[index].dw3.strong_edge_weight = 7;
2432     sampler_8x8[index].dw3.regular_weight = 2;
2433     sampler_8x8[index].dw3.non_edge_weight = 0;
2434     sampler_8x8[index].dw3.gain_factor = 40;
2435     sampler_8x8[index].dw4.steepness_boost = 0;
2436     sampler_8x8[index].dw4.steepness_threshold = 0;
2437     sampler_8x8[index].dw4.mr_boost = 0;
2438     sampler_8x8[index].dw4.mr_threshold = 5;
2439     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2440     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2441     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2442     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2443     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2444     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2445     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2446     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2447     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2448     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2449     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2450     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2451     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2452     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2453     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2454     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2455     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2456     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2457     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2458     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2459     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2460     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2461     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2462     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2463     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2464     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2465     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2466     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2467     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2468     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2469     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2470     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2471     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2472     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2473     sampler_8x8[index].dw13.limiter_boost = 0;
2474     sampler_8x8[index].dw13.minimum_limiter = 10;
2475     sampler_8x8[index].dw13.maximum_limiter = 11;
2476     sampler_8x8[index].dw14.clip_limiter = 130;
2477     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2478                       I915_GEM_DOMAIN_RENDER, 
2479                       0,
2480                       0,
2481                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2482                       pp_context->sampler_state_table.bo_8x8);
2483
2484     /* sample_8x8 UV index 2 */
2485     index = 2;
2486     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2487     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
2488     sampler_8x8[index].dw0.ief_bypass = 1;
2489     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
2490     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
2491     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2492     sampler_8x8[index].dw2.global_noise_estimation = 22;
2493     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2494     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2495     sampler_8x8[index].dw3.strong_edge_weight = 7;
2496     sampler_8x8[index].dw3.regular_weight = 2;
2497     sampler_8x8[index].dw3.non_edge_weight = 0;
2498     sampler_8x8[index].dw3.gain_factor = 40;
2499     sampler_8x8[index].dw4.steepness_boost = 0;
2500     sampler_8x8[index].dw4.steepness_threshold = 0;
2501     sampler_8x8[index].dw4.mr_boost = 0;
2502     sampler_8x8[index].dw4.mr_threshold = 5;
2503     sampler_8x8[index].dw5.pwl1_point_1 = 4;
2504     sampler_8x8[index].dw5.pwl1_point_2 = 12;
2505     sampler_8x8[index].dw5.pwl1_point_3 = 16;
2506     sampler_8x8[index].dw5.pwl1_point_4 = 26;
2507     sampler_8x8[index].dw6.pwl1_point_5 = 40;
2508     sampler_8x8[index].dw6.pwl1_point_6 = 160;
2509     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
2510     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
2511     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
2512     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
2513     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
2514     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
2515     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
2516     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
2517     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
2518     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
2519     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
2520     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
2521     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
2522     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
2523     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
2524     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
2525     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
2526     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
2527     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
2528     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
2529     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
2530     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
2531     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
2532     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
2533     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
2534     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
2535     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
2536     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
2537     sampler_8x8[index].dw13.limiter_boost = 0;
2538     sampler_8x8[index].dw13.minimum_limiter = 10;
2539     sampler_8x8[index].dw13.maximum_limiter = 11;
2540     sampler_8x8[index].dw14.clip_limiter = 130;
2541     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2542                       I915_GEM_DOMAIN_RENDER, 
2543                       0,
2544                       0,
2545                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2546                       pp_context->sampler_state_table.bo_8x8);
2547
2548     dri_bo_unmap(pp_context->sampler_state_table.bo);
2549
2550     /* private function & data */
2551     pp_context->pp_x_steps = pp_avs_x_steps;
2552     pp_context->pp_y_steps = pp_avs_y_steps;
2553     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
2554
2555     int dst_left_edge_extend = dst_rect->x%GPU_ASM_X_OFFSET_ALIGNMENT;
2556     float src_left_edge_extend = (float)dst_left_edge_extend*src_rect->width/dst_rect->width;
2557     pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
2558     pp_avs_context->dest_y = dst_rect->y;
2559     pp_avs_context->dest_w = ALIGN(dst_rect->width + dst_left_edge_extend, 16);
2560     pp_avs_context->dest_h = ALIGN(dst_rect->height, 8);
2561     pp_avs_context->src_normalized_x = (float)(src_rect->x - src_left_edge_extend)/ in_w;
2562     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h;
2563     pp_avs_context->src_w = src_rect->width + src_left_edge_extend;
2564     pp_avs_context->src_h = src_rect->height;
2565
2566     pp_static_parameter->grf4.r4_2.avs.nlas = nlas;
2567     pp_static_parameter->grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / dst_rect->height;
2568
2569     pp_inline_parameter->grf5.normalized_video_x_scaling_step = (float) (src_rect->width + src_left_edge_extend)/ in_w / (dst_rect->width + dst_left_edge_extend);
2570     pp_inline_parameter->grf5.block_count_x = 1;        /* M x 1 */
2571     pp_inline_parameter->grf5.number_blocks = pp_avs_context->dest_h / 8;
2572     pp_inline_parameter->grf6.video_step_delta = 0.0;
2573
2574     dst_surface->flags = src_surface->flags;
2575
2576     return VA_STATUS_SUCCESS;
2577 }
2578
2579 static VAStatus
2580 pp_nv12_avs_initialize_nlas(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2581                             const struct i965_surface *src_surface,
2582                             const VARectangle *src_rect,
2583                             struct i965_surface *dst_surface,
2584                             const VARectangle *dst_rect,
2585                             void *filter_param)
2586 {
2587     return pp_nv12_avs_initialize(ctx, pp_context,
2588                                   src_surface,
2589                                   src_rect,
2590                                   dst_surface,
2591                                   dst_rect,
2592                                   filter_param,
2593                                   1);
2594 }
2595
2596 static VAStatus
2597 gen6_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2598                              const struct i965_surface *src_surface,
2599                              const VARectangle *src_rect,
2600                              struct i965_surface *dst_surface,
2601                              const VARectangle *dst_rect,
2602                              void *filter_param)
2603 {
2604     return pp_nv12_avs_initialize(ctx, pp_context,
2605                                   src_surface,
2606                                   src_rect,
2607                                   dst_surface,
2608                                   dst_rect,
2609                                   filter_param,
2610                                   0);    
2611 }
2612
2613 static int
2614 gen7_pp_avs_x_steps(void *private_context)
2615 {
2616     struct pp_avs_context *pp_avs_context = private_context;
2617
2618     return pp_avs_context->dest_w / 16;
2619 }
2620
2621 static int
2622 gen7_pp_avs_y_steps(void *private_context)
2623 {
2624     struct pp_avs_context *pp_avs_context = private_context;
2625
2626     return pp_avs_context->dest_h / 16;
2627 }
2628
2629 static int
2630 gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
2631 {
2632     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2633     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
2634
2635     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
2636     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
2637     pp_inline_parameter->grf7.constant_0 = 0xffffffff;
2638     pp_inline_parameter->grf7.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
2639
2640     return 0;
2641 }
2642
2643 static void gen7_update_src_surface_uv_offset(VADriverContextP    ctx, 
2644                                               struct i965_post_processing_context *pp_context,
2645                                               const struct i965_surface *surface)
2646 {
2647     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2648     int fourcc = pp_get_surface_fourcc(ctx, surface);
2649     
2650     if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2')) {
2651         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
2652         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
2653         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
2654     } else if (fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
2655         pp_static_parameter->grf2.di_destination_packed_y_component_offset = 1;
2656         pp_static_parameter->grf2.di_destination_packed_u_component_offset = 0;
2657         pp_static_parameter->grf2.di_destination_packed_v_component_offset = 2;
2658     }
2659 }
2660
2661 static VAStatus
2662 gen7_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2663                            const struct i965_surface *src_surface,
2664                            const VARectangle *src_rect,
2665                            struct i965_surface *dst_surface,
2666                            const VARectangle *dst_rect,
2667                            void *filter_param)
2668 {
2669     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2670     struct i965_driver_data *i965 = i965_driver_data(ctx);
2671     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2672     struct gen7_sampler_8x8 *sampler_8x8;
2673     struct i965_sampler_8x8_state *sampler_8x8_state;
2674     int index, i;
2675     int width[3], height[3], pitch[3], offset[3];
2676     int src_width, src_height;
2677
2678     /* source surface */
2679     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2680                                          width, height, pitch, offset);
2681     src_width = width[0];
2682     src_height = height[0];
2683
2684     /* destination surface */
2685     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2686                                          width, height, pitch, offset);
2687
2688     /* sampler 8x8 state */
2689     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2690     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2691     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2692     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2693     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2694
2695     for (i = 0; i < 17; i++) {
2696         float coff;
2697         coff = i;
2698         coff = coff / 16;
2699         /* for Y channel, currently ignore */
2700         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2701         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2702         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2703         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6,0);
2704         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2705         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2706         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2707         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2708         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2709         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2710         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2711         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2712         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2713         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2714         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2715         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2716         /* for U/V channel, 0.25 */
2717         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2718         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2719         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x0;
2720         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2721         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2722         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0;
2723         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2724         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2725         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2726         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2727         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x0;
2728         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2729         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2730         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x0;
2731         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2732         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2733     }
2734
2735     sampler_8x8_state->dw136.default_sharpness_level = 0;
2736     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
2737     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2738     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2739     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2740
2741     /* sampler 8x8 */
2742     dri_bo_map(pp_context->sampler_state_table.bo, True);
2743     assert(pp_context->sampler_state_table.bo->virtual);
2744     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2745     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2746
2747     /* sample_8x8 Y index 4 */
2748     index = 4;
2749     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2750     sampler_8x8[index].dw0.global_noise_estimation = 255;
2751     sampler_8x8[index].dw0.ief_bypass = 1;
2752
2753     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2754
2755     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2756     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2757     sampler_8x8[index].dw2.r5x_coefficient = 9;
2758     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2759     sampler_8x8[index].dw2.r5c_coefficient = 3;
2760
2761     sampler_8x8[index].dw3.r3x_coefficient = 27;
2762     sampler_8x8[index].dw3.r3c_coefficient = 5;
2763     sampler_8x8[index].dw3.gain_factor = 40;
2764     sampler_8x8[index].dw3.non_edge_weight = 1;
2765     sampler_8x8[index].dw3.regular_weight = 2;
2766     sampler_8x8[index].dw3.strong_edge_weight = 7;
2767     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2768
2769     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2770                       I915_GEM_DOMAIN_RENDER, 
2771                       0,
2772                       0,
2773                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2774                       pp_context->sampler_state_table.bo_8x8);
2775
2776     /* sample_8x8 UV index 8 */
2777     index = 8;
2778     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2779     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2780     sampler_8x8[index].dw0.global_noise_estimation = 255;
2781     sampler_8x8[index].dw0.ief_bypass = 1;
2782     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2783     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2784     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2785     sampler_8x8[index].dw2.r5x_coefficient = 9;
2786     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2787     sampler_8x8[index].dw2.r5c_coefficient = 3;
2788     sampler_8x8[index].dw3.r3x_coefficient = 27;
2789     sampler_8x8[index].dw3.r3c_coefficient = 5;
2790     sampler_8x8[index].dw3.gain_factor = 40;
2791     sampler_8x8[index].dw3.non_edge_weight = 1;
2792     sampler_8x8[index].dw3.regular_weight = 2;
2793     sampler_8x8[index].dw3.strong_edge_weight = 7;
2794     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2795
2796     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2797                       I915_GEM_DOMAIN_RENDER, 
2798                       0,
2799                       0,
2800                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2801                       pp_context->sampler_state_table.bo_8x8);
2802
2803     /* sampler_8x8 V, index 12 */
2804     index = 12;
2805     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2806     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2807     sampler_8x8[index].dw0.global_noise_estimation = 255;
2808     sampler_8x8[index].dw0.ief_bypass = 1;
2809     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2810     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2811     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2812     sampler_8x8[index].dw2.r5x_coefficient = 9;
2813     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2814     sampler_8x8[index].dw2.r5c_coefficient = 3;
2815     sampler_8x8[index].dw3.r3x_coefficient = 27;
2816     sampler_8x8[index].dw3.r3c_coefficient = 5;
2817     sampler_8x8[index].dw3.gain_factor = 40;
2818     sampler_8x8[index].dw3.non_edge_weight = 1;
2819     sampler_8x8[index].dw3.regular_weight = 2;
2820     sampler_8x8[index].dw3.strong_edge_weight = 7;
2821     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2822
2823     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2824                       I915_GEM_DOMAIN_RENDER, 
2825                       0,
2826                       0,
2827                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2828                       pp_context->sampler_state_table.bo_8x8);
2829
2830     dri_bo_unmap(pp_context->sampler_state_table.bo);
2831
2832     /* private function & data */
2833     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
2834     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
2835     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
2836
2837     pp_avs_context->dest_x = dst_rect->x;
2838     pp_avs_context->dest_y = dst_rect->y;
2839     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
2840     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
2841     pp_avs_context->src_w = src_rect->width;
2842     pp_avs_context->src_h = src_rect->height;
2843     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
2844
2845     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
2846     dw = MAX(dw, pp_avs_context->dest_w);
2847
2848     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
2849     pp_static_parameter->grf2.avs_wa_enable = 1; /* must be set for GEN7 */
2850     if (IS_HASWELL(i965->intel.device_id))
2851         pp_static_parameter->grf2.avs_wa_enable = 0; /* HSW don't use the WA */
2852         
2853     pp_static_parameter->grf2.avs_wa_width = dw;
2854     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
2855     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
2856
2857     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
2858     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
2859     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
2860                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
2861     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
2862                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
2863
2864     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
2865
2866     dst_surface->flags = src_surface->flags;
2867
2868     return VA_STATUS_SUCCESS;
2869 }
2870
2871
2872 static VAStatus
2873 gen7_pp_rgbx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
2874                            const struct i965_surface *src_surface,
2875                            const VARectangle *src_rect,
2876                            struct i965_surface *dst_surface,
2877                            const VARectangle *dst_rect,
2878                            void *filter_param)
2879 {
2880     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
2881     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
2882     struct gen7_sampler_8x8 *sampler_8x8;
2883     struct i965_sampler_8x8_state *sampler_8x8_state;
2884     int index, i;
2885     int width[3], height[3], pitch[3], offset[3];
2886     int src_width, src_height;
2887
2888     /* source surface */
2889     gen7_pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 0, 0,
2890                                          width, height, pitch, offset);
2891     src_width = width[0];
2892     src_height = height[0];
2893
2894     /* destination surface */
2895     gen7_pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 24, 1,
2896                                          width, height, pitch, offset);
2897
2898     /* sampler 8x8 state */
2899     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
2900     assert(pp_context->sampler_state_table.bo_8x8->virtual);
2901     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
2902     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
2903     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
2904
2905     /* The sampler_state setting of RGBX surface will be different with
2906      * that for NV12/I420 surface. 
2907      */
2908     for (i = 0; i < 17; i++) {
2909         float coff;
2910         coff = i;
2911         coff = coff / 16;
2912         /* for Y channel, currently ignore */
2913         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c0 = 0x0;
2914         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c1 = 0x0;
2915         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c2 = 0x0;
2916         sampler_8x8_state->coefficients[i].dw0.table_0x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2917         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2918         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c5 = 0x0;
2919         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c6 = 0x0;
2920         sampler_8x8_state->coefficients[i].dw1.table_0x_filter_c7 = 0x0;
2921         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c0 = 0x0;
2922         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c1 = 0x0;
2923         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c2 = 0x0;
2924         sampler_8x8_state->coefficients[i].dw2.table_0y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2925         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2926         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c5 = 0x0;
2927         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c6 = 0x0;
2928         sampler_8x8_state->coefficients[i].dw3.table_0y_filter_c7 = 0x0;
2929         /* for U/V channel, 0.25 */
2930         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c0 = 0x0;
2931         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c1 = 0x0;
2932         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c2 = 0x00;
2933         sampler_8x8_state->coefficients[i].dw4.table_1x_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2934         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2935         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c5 = 0x00;
2936         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c6 = 0x0;
2937         sampler_8x8_state->coefficients[i].dw5.table_1x_filter_c7 = 0x0;
2938         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c0 = 0x0;
2939         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c1 = 0x0;
2940         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c2 = 0x00;
2941         sampler_8x8_state->coefficients[i].dw6.table_1y_filter_c3 = intel_format_convert(1 - coff, 1, 6, 0);
2942         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c4 = intel_format_convert(coff, 1, 6, 0);
2943         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c5 = 0x00;
2944         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c6 = 0x0;
2945         sampler_8x8_state->coefficients[i].dw7.table_1y_filter_c7 = 0x0;
2946     }
2947
2948     sampler_8x8_state->dw136.default_sharpness_level = 0;
2949     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
2950     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
2951     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
2952     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
2953
2954     /* sampler 8x8 */
2955     dri_bo_map(pp_context->sampler_state_table.bo, True);
2956     assert(pp_context->sampler_state_table.bo->virtual);
2957     assert(sizeof(*sampler_8x8) == sizeof(int) * 4);
2958     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
2959
2960     /* sample_8x8 Y index 4 */
2961     index = 4;
2962     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2963     sampler_8x8[index].dw0.global_noise_estimation = 255;
2964     sampler_8x8[index].dw0.ief_bypass = 1;
2965
2966     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2967
2968     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2969     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2970     sampler_8x8[index].dw2.r5x_coefficient = 9;
2971     sampler_8x8[index].dw2.r5cx_coefficient = 8;
2972     sampler_8x8[index].dw2.r5c_coefficient = 3;
2973
2974     sampler_8x8[index].dw3.r3x_coefficient = 27;
2975     sampler_8x8[index].dw3.r3c_coefficient = 5;
2976     sampler_8x8[index].dw3.gain_factor = 40;
2977     sampler_8x8[index].dw3.non_edge_weight = 1;
2978     sampler_8x8[index].dw3.regular_weight = 2;
2979     sampler_8x8[index].dw3.strong_edge_weight = 7;
2980     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
2981
2982     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
2983                       I915_GEM_DOMAIN_RENDER, 
2984                       0,
2985                       0,
2986                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
2987                       pp_context->sampler_state_table.bo_8x8);
2988
2989     /* sample_8x8 UV index 8 */
2990     index = 8;
2991     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
2992     sampler_8x8[index].dw0.disable_8x8_filter = 0;
2993     sampler_8x8[index].dw0.global_noise_estimation = 255;
2994     sampler_8x8[index].dw0.ief_bypass = 1;
2995     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
2996     sampler_8x8[index].dw2.weak_edge_threshold = 1;
2997     sampler_8x8[index].dw2.strong_edge_threshold = 8;
2998     sampler_8x8[index].dw2.r5x_coefficient = 9;
2999     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3000     sampler_8x8[index].dw2.r5c_coefficient = 3;
3001     sampler_8x8[index].dw3.r3x_coefficient = 27;
3002     sampler_8x8[index].dw3.r3c_coefficient = 5;
3003     sampler_8x8[index].dw3.gain_factor = 40;
3004     sampler_8x8[index].dw3.non_edge_weight = 1;
3005     sampler_8x8[index].dw3.regular_weight = 2;
3006     sampler_8x8[index].dw3.strong_edge_weight = 7;
3007     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3008
3009     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3010                       I915_GEM_DOMAIN_RENDER, 
3011                       0,
3012                       0,
3013                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3014                       pp_context->sampler_state_table.bo_8x8);
3015
3016     /* sampler_8x8 V, index 12 */
3017     index = 12;
3018     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
3019     sampler_8x8[index].dw0.disable_8x8_filter = 0;
3020     sampler_8x8[index].dw0.global_noise_estimation = 255;
3021     sampler_8x8[index].dw0.ief_bypass = 1;
3022     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
3023     sampler_8x8[index].dw2.weak_edge_threshold = 1;
3024     sampler_8x8[index].dw2.strong_edge_threshold = 8;
3025     sampler_8x8[index].dw2.r5x_coefficient = 9;
3026     sampler_8x8[index].dw2.r5cx_coefficient = 8;
3027     sampler_8x8[index].dw2.r5c_coefficient = 3;
3028     sampler_8x8[index].dw3.r3x_coefficient = 27;
3029     sampler_8x8[index].dw3.r3c_coefficient = 5;
3030     sampler_8x8[index].dw3.gain_factor = 40;
3031     sampler_8x8[index].dw3.non_edge_weight = 1;
3032     sampler_8x8[index].dw3.regular_weight = 2;
3033     sampler_8x8[index].dw3.strong_edge_weight = 7;
3034     sampler_8x8[index].dw3.ief4_smooth_enable = 0;
3035
3036     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
3037                       I915_GEM_DOMAIN_RENDER, 
3038                       0,
3039                       0,
3040                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
3041                       pp_context->sampler_state_table.bo_8x8);
3042
3043     dri_bo_unmap(pp_context->sampler_state_table.bo);
3044
3045     /* private function & data */
3046     pp_context->pp_x_steps = gen7_pp_avs_x_steps;
3047     pp_context->pp_y_steps = gen7_pp_avs_y_steps;
3048     pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
3049
3050     pp_avs_context->dest_x = dst_rect->x;
3051     pp_avs_context->dest_y = dst_rect->y;
3052     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
3053     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
3054     pp_avs_context->src_w = src_rect->width;
3055     pp_avs_context->src_h = src_rect->height;
3056     pp_avs_context->horiz_range = (float)src_rect->width / src_width;
3057
3058     int dw = (pp_avs_context->src_w - 1) / 16 + 1;
3059     dw = MAX(dw, pp_avs_context->dest_w);
3060
3061     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3062     pp_static_parameter->grf2.avs_wa_enable = 0; /* It is unnecessary to use WA for RGBX surface */
3063     pp_static_parameter->grf2.avs_wa_width = dw;
3064     pp_static_parameter->grf2.avs_wa_one_div_256_width = (float) 1.0 / (256 * dw);
3065     pp_static_parameter->grf2.avs_wa_five_div_256_width = (float) 5.0 / (256 * dw);
3066
3067     pp_static_parameter->grf3.sampler_load_horizontal_scaling_step_ratio = (float) pp_avs_context->src_w / dw;
3068     pp_static_parameter->grf4.sampler_load_vertical_scaling_step = (float) src_rect->height / src_height / pp_avs_context->dest_h;
3069     pp_static_parameter->grf5.sampler_load_vertical_frame_origin = (float) src_rect->y / src_height -
3070                                                                    (float) pp_avs_context->dest_y * pp_static_parameter->grf4.sampler_load_vertical_scaling_step;
3071     pp_static_parameter->grf6.sampler_load_horizontal_frame_origin = (float) src_rect->x / src_width -
3072                                                                      (float) pp_avs_context->dest_x * pp_avs_context->horiz_range / dw;
3073     gen7_update_src_surface_uv_offset(ctx, pp_context, dst_surface);
3074
3075     dst_surface->flags = src_surface->flags;
3076
3077     return VA_STATUS_SUCCESS;
3078 }
3079
3080 static int
3081 pp_dndi_x_steps(void *private_context)
3082 {
3083     return 1;
3084 }
3085
3086 static int
3087 pp_dndi_y_steps(void *private_context)
3088 {
3089     struct pp_dndi_context *pp_dndi_context = private_context;
3090
3091     return pp_dndi_context->dest_h / 4;
3092 }
3093
3094 static int
3095 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3096 {
3097     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3098
3099     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3100     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3101
3102     return 0;
3103 }
3104
3105 static VAStatus
3106 pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3107                         const struct i965_surface *src_surface,
3108                         const VARectangle *src_rect,
3109                         struct i965_surface *dst_surface,
3110                         const VARectangle *dst_rect,
3111                         void *filter_param)
3112 {
3113     struct i965_driver_data *i965 = i965_driver_data(ctx);
3114     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3115     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3116     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3117     struct object_surface *obj_surface;
3118     struct i965_sampler_dndi *sampler_dndi;
3119     int index;
3120     int w, h;
3121     int orig_w, orig_h;
3122     int dndi_top_first = 1;
3123
3124     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3125         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3126
3127     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3128         dndi_top_first = 1;
3129     else
3130         dndi_top_first = 0;
3131
3132     /* surface */
3133     obj_surface = SURFACE(src_surface->id);
3134     orig_w = obj_surface->orig_width;
3135     orig_h = obj_surface->orig_height;
3136     w = obj_surface->width;
3137     h = obj_surface->height;
3138
3139     if (pp_context->stmm.bo == NULL) {
3140         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3141                                            "STMM surface",
3142                                            w * h,
3143                                            4096);
3144         assert(pp_context->stmm.bo);
3145     }
3146
3147     /* source UV surface index 2 */
3148     i965_pp_set_surface_state(ctx, pp_context,
3149                               obj_surface->bo, w * h,
3150                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3151                               2, 0);
3152
3153     /* source YUV surface index 4 */
3154     i965_pp_set_surface2_state(ctx, pp_context,
3155                                obj_surface->bo, 0,
3156                                orig_w, orig_h, w,
3157                                0, h,
3158                                SURFACE_FORMAT_PLANAR_420_8, 1,
3159                                4);
3160
3161     /* source STMM surface index 20 */
3162     i965_pp_set_surface_state(ctx, pp_context,
3163                               pp_context->stmm.bo, 0,
3164                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3165                               20, 1);
3166
3167     /* destination surface */
3168     obj_surface = SURFACE(dst_surface->id);
3169     orig_w = obj_surface->orig_width;
3170     orig_h = obj_surface->orig_height;
3171     w = obj_surface->width;
3172     h = obj_surface->height;
3173
3174     /* destination Y surface index 7 */
3175     i965_pp_set_surface_state(ctx, pp_context,
3176                               obj_surface->bo, 0,
3177                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3178                               7, 1);
3179
3180     /* destination UV surface index 8 */
3181     i965_pp_set_surface_state(ctx, pp_context,
3182                               obj_surface->bo, w * h,
3183                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3184                               8, 1);
3185     /* sampler dndi */
3186     dri_bo_map(pp_context->sampler_state_table.bo, True);
3187     assert(pp_context->sampler_state_table.bo->virtual);
3188     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3189     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3190
3191     /* sample dndi index 1 */
3192     index = 0;
3193     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3194     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3195     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3196     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3197
3198     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3199     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 4;
3200     sampler_dndi[index].dw1.stmm_c2 = 1;
3201     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3202     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3203
3204     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3205     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3206     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3207     sampler_dndi[index].dw2.good_neighbor_threshold = 4;                // 0-63
3208
3209     sampler_dndi[index].dw3.maximum_stmm = 128;
3210     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3211     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3212     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3213     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3214
3215     sampler_dndi[index].dw4.sdi_delta = 8;
3216     sampler_dndi[index].dw4.sdi_threshold = 128;
3217     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3218     sampler_dndi[index].dw4.stmm_shift_up = 0;
3219     sampler_dndi[index].dw4.stmm_shift_down = 0;
3220     sampler_dndi[index].dw4.minimum_stmm = 0;
3221
3222     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 8;
3223     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 32;
3224     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 64;
3225     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 32;
3226
3227     sampler_dndi[index].dw6.dn_enable = 1;
3228     sampler_dndi[index].dw6.di_enable = 1;
3229     sampler_dndi[index].dw6.di_partial = 0;
3230     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3231     sampler_dndi[index].dw6.dndi_stream_id = 0;
3232     sampler_dndi[index].dw6.dndi_first_frame = 1;
3233     sampler_dndi[index].dw6.progressive_dn = 0;
3234     sampler_dndi[index].dw6.fmd_tear_threshold = 63;
3235     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3236     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3237
3238     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3239     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3240     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3241     sampler_dndi[index].dw7.column_width_minus1 = 0;
3242
3243     dri_bo_unmap(pp_context->sampler_state_table.bo);
3244
3245     /* private function & data */
3246     pp_context->pp_x_steps = pp_dndi_x_steps;
3247     pp_context->pp_y_steps = pp_dndi_y_steps;
3248     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
3249
3250     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3251     pp_static_parameter->grf1.r1_6.di.top_field_first = dndi_top_first;
3252     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 0;
3253     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 0;
3254
3255     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3256     pp_inline_parameter->grf5.number_blocks = w / 16;
3257     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3258     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3259
3260     pp_dndi_context->dest_w = w;
3261     pp_dndi_context->dest_h = h;
3262
3263     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3264
3265     return VA_STATUS_SUCCESS;
3266 }
3267
3268 static int
3269 pp_dn_x_steps(void *private_context)
3270 {
3271     return 1;
3272 }
3273
3274 static int
3275 pp_dn_y_steps(void *private_context)
3276 {
3277     struct pp_dn_context *pp_dn_context = private_context;
3278
3279     return pp_dn_context->dest_h / 8;
3280 }
3281
3282 static int
3283 pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3284 {
3285     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3286
3287     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3288     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 8;
3289
3290     return 0;
3291 }
3292
3293 static VAStatus
3294 pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3295                       const struct i965_surface *src_surface,
3296                       const VARectangle *src_rect,
3297                       struct i965_surface *dst_surface,
3298                       const VARectangle *dst_rect,
3299                       void *filter_param)
3300 {
3301     struct i965_driver_data *i965 = i965_driver_data(ctx);
3302     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3303     struct object_surface *obj_surface;
3304     struct i965_sampler_dndi *sampler_dndi;
3305     struct pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3306     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3307     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3308     int index;
3309     int w, h;
3310     int orig_w, orig_h;
3311     int dn_strength = 15;
3312     int dndi_top_first = 1;
3313     int dn_progressive = 0;
3314
3315     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3316         dndi_top_first = 1;
3317         dn_progressive = 1;
3318     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3319         dndi_top_first = 1;
3320         dn_progressive = 0;
3321     } else {
3322         dndi_top_first = 0;
3323         dn_progressive = 0;
3324     }
3325
3326     if (dn_filter_param) {
3327         float value = dn_filter_param->value;
3328         
3329         if (value > 1.0)
3330             value = 1.0;
3331         
3332         if (value < 0.0)
3333             value = 0.0;
3334
3335         dn_strength = (int)(value * 31.0F);
3336     }
3337
3338     /* surface */
3339     obj_surface = SURFACE(src_surface->id);
3340     orig_w = obj_surface->orig_width;
3341     orig_h = obj_surface->orig_height;
3342     w = obj_surface->width;
3343     h = obj_surface->height;
3344
3345     if (pp_context->stmm.bo == NULL) {
3346         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3347                                            "STMM surface",
3348                                            w * h,
3349                                            4096);
3350         assert(pp_context->stmm.bo);
3351     }
3352
3353     /* source UV surface index 2 */
3354     i965_pp_set_surface_state(ctx, pp_context,
3355                               obj_surface->bo, w * h,
3356                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3357                               2, 0);
3358
3359     /* source YUV surface index 4 */
3360     i965_pp_set_surface2_state(ctx, pp_context,
3361                                obj_surface->bo, 0,
3362                                orig_w, orig_h, w,
3363                                0, h,
3364                                SURFACE_FORMAT_PLANAR_420_8, 1,
3365                                4);
3366
3367     /* source STMM surface index 20 */
3368     i965_pp_set_surface_state(ctx, pp_context,
3369                               pp_context->stmm.bo, 0,
3370                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3371                               20, 1);
3372
3373     /* destination surface */
3374     obj_surface = SURFACE(dst_surface->id);
3375     orig_w = obj_surface->orig_width;
3376     orig_h = obj_surface->orig_height;
3377     w = obj_surface->width;
3378     h = obj_surface->height;
3379
3380     /* destination Y surface index 7 */
3381     i965_pp_set_surface_state(ctx, pp_context,
3382                               obj_surface->bo, 0,
3383                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3384                               7, 1);
3385
3386     /* destination UV surface index 8 */
3387     i965_pp_set_surface_state(ctx, pp_context,
3388                               obj_surface->bo, w * h,
3389                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3390                               8, 1);
3391     /* sampler dn */
3392     dri_bo_map(pp_context->sampler_state_table.bo, True);
3393     assert(pp_context->sampler_state_table.bo->virtual);
3394     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3395     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3396
3397     /* sample dndi index 1 */
3398     index = 0;
3399     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3400     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
3401     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3402     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3403
3404     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3405     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3406     sampler_dndi[index].dw1.stmm_c2 = 0;
3407     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3408     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3409
3410     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3411     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
3412     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
3413     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
3414
3415     sampler_dndi[index].dw3.maximum_stmm = 128;
3416     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3417     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3418     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3419     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3420
3421     sampler_dndi[index].dw4.sdi_delta = 8;
3422     sampler_dndi[index].dw4.sdi_threshold = 128;
3423     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3424     sampler_dndi[index].dw4.stmm_shift_up = 0;
3425     sampler_dndi[index].dw4.stmm_shift_down = 0;
3426     sampler_dndi[index].dw4.minimum_stmm = 0;
3427
3428     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3429     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3430     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3431     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3432
3433     sampler_dndi[index].dw6.dn_enable = 1;
3434     sampler_dndi[index].dw6.di_enable = 0;
3435     sampler_dndi[index].dw6.di_partial = 0;
3436     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3437     sampler_dndi[index].dw6.dndi_stream_id = 1;
3438     sampler_dndi[index].dw6.dndi_first_frame = 1;
3439     sampler_dndi[index].dw6.progressive_dn = dn_progressive;
3440     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3441     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3442     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3443
3444     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3445     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3446     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3447     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3448
3449     dri_bo_unmap(pp_context->sampler_state_table.bo);
3450
3451     /* private function & data */
3452     pp_context->pp_x_steps = pp_dn_x_steps;
3453     pp_context->pp_y_steps = pp_dn_y_steps;
3454     pp_context->pp_set_block_parameter = pp_dn_set_block_parameter;
3455
3456     pp_static_parameter->grf1.statistics_surface_picth = w / 2;
3457     pp_static_parameter->grf1.r1_6.di.top_field_first = 0;
3458     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m2 = 64;
3459     pp_static_parameter->grf4.r4_2.di.motion_history_coefficient_m1 = 192;
3460
3461     pp_inline_parameter->grf5.block_count_x = w / 16;   /* 1 x N */
3462     pp_inline_parameter->grf5.number_blocks = w / 16;
3463     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
3464     pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
3465
3466     pp_dn_context->dest_w = w;
3467     pp_dn_context->dest_h = h;
3468
3469     dst_surface->flags = src_surface->flags;
3470     
3471     return VA_STATUS_SUCCESS;
3472 }
3473
3474 static int
3475 gen7_pp_dndi_x_steps(void *private_context)
3476 {
3477     struct pp_dndi_context *pp_dndi_context = private_context;
3478
3479     return pp_dndi_context->dest_w / 16;
3480 }
3481
3482 static int
3483 gen7_pp_dndi_y_steps(void *private_context)
3484 {
3485     struct pp_dndi_context *pp_dndi_context = private_context;
3486
3487     return pp_dndi_context->dest_h / 4;
3488 }
3489
3490 static int
3491 gen7_pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3492 {
3493     struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3494
3495     pp_inline_parameter->grf7.destination_block_horizontal_origin = x * 16;
3496     pp_inline_parameter->grf7.destination_block_vertical_origin = y * 4;
3497
3498     return 0;
3499 }
3500
3501 static VAStatus
3502 gen7_pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3503                              const struct i965_surface *src_surface,
3504                              const VARectangle *src_rect,
3505                              struct i965_surface *dst_surface,
3506                              const VARectangle *dst_rect,
3507                              void *filter_param)
3508 {
3509     struct i965_driver_data *i965 = i965_driver_data(ctx);
3510     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
3511     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3512     struct object_surface *obj_surface;
3513     struct gen7_sampler_dndi *sampler_dndi;
3514     int index;
3515     int w, h;
3516     int orig_w, orig_h;
3517     int dndi_top_first = 1;
3518
3519     if (src_surface->flags == I965_SURFACE_FLAG_FRAME)
3520         return VA_STATUS_ERROR_FLAG_NOT_SUPPORTED;
3521
3522     if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST)
3523         dndi_top_first = 1;
3524     else
3525         dndi_top_first = 0;
3526
3527     /* surface */
3528     obj_surface = SURFACE(src_surface->id);
3529     orig_w = obj_surface->orig_width;
3530     orig_h = obj_surface->orig_height;
3531     w = obj_surface->width;
3532     h = obj_surface->height;
3533
3534     if (pp_context->stmm.bo == NULL) {
3535         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3536                                            "STMM surface",
3537                                            w * h,
3538                                            4096);
3539         assert(pp_context->stmm.bo);
3540     }
3541
3542     /* source UV surface index 1 */
3543     gen7_pp_set_surface_state(ctx, pp_context,
3544                               obj_surface->bo, w * h,
3545                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3546                               1, 0);
3547
3548     /* source YUV surface index 3 */
3549     gen7_pp_set_surface2_state(ctx, pp_context,
3550                                obj_surface->bo, 0,
3551                                orig_w, orig_h, w,
3552                                0, h,
3553                                SURFACE_FORMAT_PLANAR_420_8, 1,
3554                                3);
3555
3556     /* source (temporal reference) YUV surface index 4 */
3557     gen7_pp_set_surface2_state(ctx, pp_context,
3558                                obj_surface->bo, 0,
3559                                orig_w, orig_h, w,
3560                                0, h,
3561                                SURFACE_FORMAT_PLANAR_420_8, 1,
3562                                4);
3563
3564     /* STMM / History Statistics input surface, index 5 */
3565     gen7_pp_set_surface_state(ctx, pp_context,
3566                               pp_context->stmm.bo, 0,
3567                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3568                               5, 1);
3569
3570     /* destination surface */
3571     obj_surface = SURFACE(dst_surface->id);
3572     orig_w = obj_surface->orig_width;
3573     orig_h = obj_surface->orig_height;
3574     w = obj_surface->width;
3575     h = obj_surface->height;
3576
3577     /* destination(Previous frame) Y surface index 27 */
3578     gen7_pp_set_surface_state(ctx, pp_context,
3579                               obj_surface->bo, 0,
3580                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3581                               27, 1);
3582
3583     /* destination(Previous frame) UV surface index 28 */
3584     gen7_pp_set_surface_state(ctx, pp_context,
3585                               obj_surface->bo, w * h,
3586                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3587                               28, 1);
3588
3589     /* destination(Current frame) Y surface index 30 */
3590     gen7_pp_set_surface_state(ctx, pp_context,
3591                               obj_surface->bo, 0,
3592                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3593                               30, 1);
3594
3595     /* destination(Current frame) UV surface index 31 */
3596     gen7_pp_set_surface_state(ctx, pp_context,
3597                               obj_surface->bo, w * h,
3598                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3599                               31, 1);
3600
3601     /* STMM output surface, index 33 */
3602     gen7_pp_set_surface_state(ctx, pp_context,
3603                               pp_context->stmm.bo, 0,
3604                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3605                               33, 1);
3606
3607
3608     /* sampler dndi */
3609     dri_bo_map(pp_context->sampler_state_table.bo, True);
3610     assert(pp_context->sampler_state_table.bo->virtual);
3611     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
3612     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
3613
3614     /* sample dndi index 0 */
3615     index = 0;
3616     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
3617     sampler_dndi[index].dw0.dnmh_delt = 8;
3618     sampler_dndi[index].dw0.vdi_walker_y_stride = 0;
3619     sampler_dndi[index].dw0.vdi_walker_frame_sharing_enable = 0;
3620     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
3621     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
3622
3623     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3624     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
3625     sampler_dndi[index].dw1.stmm_c2 = 0;
3626     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
3627     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
3628
3629     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
3630     sampler_dndi[index].dw2.bne_edge_th = 1;
3631     sampler_dndi[index].dw2.smooth_mv_th = 0;
3632     sampler_dndi[index].dw2.sad_tight_th = 5;
3633     sampler_dndi[index].dw2.cat_slope_minus1 = 9;
3634     sampler_dndi[index].dw2.good_neighbor_th = 4;
3635
3636     sampler_dndi[index].dw3.maximum_stmm = 128;
3637     sampler_dndi[index].dw3.multipler_for_vecm = 2;
3638     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3639     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3640     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
3641
3642     sampler_dndi[index].dw4.sdi_delta = 8;
3643     sampler_dndi[index].dw4.sdi_threshold = 128;
3644     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3645     sampler_dndi[index].dw4.stmm_shift_up = 0;
3646     sampler_dndi[index].dw4.stmm_shift_down = 0;
3647     sampler_dndi[index].dw4.minimum_stmm = 0;
3648
3649     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
3650     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
3651     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3652     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3653
3654     sampler_dndi[index].dw6.dn_enable = 0;
3655     sampler_dndi[index].dw6.di_enable = 1;
3656     sampler_dndi[index].dw6.di_partial = 0;
3657     sampler_dndi[index].dw6.dndi_top_first = dndi_top_first;
3658     sampler_dndi[index].dw6.dndi_stream_id = 1;
3659     sampler_dndi[index].dw6.dndi_first_frame = 1;
3660     sampler_dndi[index].dw6.progressive_dn = 0;
3661     sampler_dndi[index].dw6.mcdi_enable = 0;
3662     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
3663     sampler_dndi[index].dw6.cat_th1 = 0;
3664     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
3665     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
3666
3667     sampler_dndi[index].dw7.sad_tha = 5;
3668     sampler_dndi[index].dw7.sad_thb = 10;
3669     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 0;
3670     sampler_dndi[index].dw7.mc_pixel_consistency_th = 25;
3671     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 0;
3672     sampler_dndi[index].dw7.vdi_walker_enable = 0;
3673     sampler_dndi[index].dw7.neighborpixel_th = 10;
3674     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
3675
3676     dri_bo_unmap(pp_context->sampler_state_table.bo);
3677
3678     /* private function & data */
3679     pp_context->pp_x_steps = gen7_pp_dndi_x_steps;
3680     pp_context->pp_y_steps = gen7_pp_dndi_y_steps;
3681     pp_context->pp_set_block_parameter = gen7_pp_dndi_set_block_parameter;
3682
3683     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3684     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3685     pp_static_parameter->grf1.di_top_field_first = 0;
3686     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3687
3688     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3689     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3690     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3691
3692     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3693     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3694
3695     pp_dndi_context->dest_w = w;
3696     pp_dndi_context->dest_h = h;
3697
3698     dst_surface->flags = I965_SURFACE_FLAG_FRAME;
3699
3700     return VA_STATUS_SUCCESS;
3701 }
3702
3703 static int
3704 gen7_pp_dn_x_steps(void *private_context)
3705 {
3706     struct pp_dn_context *pp_dn_context = private_context;
3707
3708     return pp_dn_context->dest_w / 16;
3709 }
3710
3711 static int
3712 gen7_pp_dn_y_steps(void *private_context)
3713 {
3714     struct pp_dn_context *pp_dn_context = private_context;
3715
3716     return pp_dn_context->dest_h / 4;
3717 }
3718
3719 static int
3720 gen7_pp_dn_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
3721 {
3722     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
3723
3724     pp_inline_parameter->grf5.destination_block_horizontal_origin = x * 16;
3725     pp_inline_parameter->grf5.destination_block_vertical_origin = y * 4;
3726
3727     return 0;
3728 }
3729
3730 static VAStatus
3731 gen7_pp_nv12_dn_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
3732                            const struct i965_surface *src_surface,
3733                            const VARectangle *src_rect,
3734                            struct i965_surface *dst_surface,
3735                            const VARectangle *dst_rect,
3736                            void *filter_param)
3737 {
3738     struct i965_driver_data *i965 = i965_driver_data(ctx);
3739     struct pp_dn_context *pp_dn_context = (struct pp_dn_context *)&pp_context->private_context;
3740     struct gen7_pp_static_parameter *pp_static_parameter = pp_context->pp_static_parameter;
3741     struct object_surface *obj_surface;
3742     struct gen7_sampler_dndi *sampler_dn;
3743     VAProcFilterParameterBuffer *dn_filter_param = filter_param; /* FIXME: parameter */
3744     int index;
3745     int w, h;
3746     int orig_w, orig_h;
3747     int dn_strength = 15;
3748     int dndi_top_first = 1;
3749     int dn_progressive = 0;
3750
3751     if (src_surface->flags == I965_SURFACE_FLAG_FRAME) {
3752         dndi_top_first = 1;
3753         dn_progressive = 1;
3754     } else if (src_surface->flags == I965_SURFACE_FLAG_TOP_FIELD_FIRST) {
3755         dndi_top_first = 1;
3756         dn_progressive = 0;
3757     } else {
3758         dndi_top_first = 0;
3759         dn_progressive = 0;
3760     }
3761
3762     if (dn_filter_param) {
3763         float value = dn_filter_param->value;
3764         
3765         if (value > 1.0)
3766             value = 1.0;
3767         
3768         if (value < 0.0)
3769             value = 0.0;
3770
3771         dn_strength = (int)(value * 31.0F);
3772     }
3773
3774     /* surface */
3775     obj_surface = SURFACE(src_surface->id);
3776     orig_w = obj_surface->orig_width;
3777     orig_h = obj_surface->orig_height;
3778     w = obj_surface->width;
3779     h = obj_surface->height;
3780
3781     if (pp_context->stmm.bo == NULL) {
3782         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
3783                                            "STMM surface",
3784                                            w * h,
3785                                            4096);
3786         assert(pp_context->stmm.bo);
3787     }
3788
3789     /* source UV surface index 1 */
3790     gen7_pp_set_surface_state(ctx, pp_context,
3791                               obj_surface->bo, w * h,
3792                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3793                               1, 0);
3794
3795     /* source YUV surface index 3 */
3796     gen7_pp_set_surface2_state(ctx, pp_context,
3797                                obj_surface->bo, 0,
3798                                orig_w, orig_h, w,
3799                                0, h,
3800                                SURFACE_FORMAT_PLANAR_420_8, 1,
3801                                3);
3802
3803     /* source (temporal reference) YUV surface index 4 */
3804     gen7_pp_set_surface2_state(ctx, pp_context,
3805                                obj_surface->bo, 0,
3806                                orig_w, orig_h, w,
3807                                0, h,
3808                                SURFACE_FORMAT_PLANAR_420_8, 1,
3809                                4);
3810
3811     /* STMM / History Statistics input surface, index 5 */
3812     gen7_pp_set_surface_state(ctx, pp_context,
3813                               pp_context->stmm.bo, 0,
3814                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3815                               33, 1);
3816
3817     /* destination surface */
3818     obj_surface = SURFACE(dst_surface->id);
3819     orig_w = obj_surface->orig_width;
3820     orig_h = obj_surface->orig_height;
3821     w = obj_surface->width;
3822     h = obj_surface->height;
3823
3824     /* destination Y surface index 24 */
3825     gen7_pp_set_surface_state(ctx, pp_context,
3826                               obj_surface->bo, 0,
3827                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
3828                               24, 1);
3829
3830     /* destination UV surface index 25 */
3831     gen7_pp_set_surface_state(ctx, pp_context,
3832                               obj_surface->bo, w * h,
3833                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
3834                               25, 1);
3835
3836     /* sampler dn */
3837     dri_bo_map(pp_context->sampler_state_table.bo, True);
3838     assert(pp_context->sampler_state_table.bo->virtual);
3839     assert(sizeof(*sampler_dn) == sizeof(int) * 8);
3840     sampler_dn = pp_context->sampler_state_table.bo->virtual;
3841
3842     /* sample dn index 1 */
3843     index = 0;
3844     sampler_dn[index].dw0.denoise_asd_threshold = 0;
3845     sampler_dn[index].dw0.dnmh_delt = 8;
3846     sampler_dn[index].dw0.vdi_walker_y_stride = 0;
3847     sampler_dn[index].dw0.vdi_walker_frame_sharing_enable = 0;
3848     sampler_dn[index].dw0.denoise_maximum_history = 128;      // 128-240
3849     sampler_dn[index].dw0.denoise_stad_threshold = 0;
3850
3851     sampler_dn[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
3852     sampler_dn[index].dw1.denoise_moving_pixel_threshold = 0;
3853     sampler_dn[index].dw1.stmm_c2 = 0;
3854     sampler_dn[index].dw1.low_temporal_difference_threshold = 8;
3855     sampler_dn[index].dw1.temporal_difference_threshold = 16;
3856
3857     sampler_dn[index].dw2.block_noise_estimate_noise_threshold = dn_strength;   // 0-31
3858     sampler_dn[index].dw2.bne_edge_th = 1;
3859     sampler_dn[index].dw2.smooth_mv_th = 0;
3860     sampler_dn[index].dw2.sad_tight_th = 5;
3861     sampler_dn[index].dw2.cat_slope_minus1 = 9;
3862     sampler_dn[index].dw2.good_neighbor_th = 4;
3863
3864     sampler_dn[index].dw3.maximum_stmm = 128;
3865     sampler_dn[index].dw3.multipler_for_vecm = 2;
3866     sampler_dn[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
3867     sampler_dn[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
3868     sampler_dn[index].dw3.stmm_blending_constant_select = 0;
3869
3870     sampler_dn[index].dw4.sdi_delta = 8;
3871     sampler_dn[index].dw4.sdi_threshold = 128;
3872     sampler_dn[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
3873     sampler_dn[index].dw4.stmm_shift_up = 0;
3874     sampler_dn[index].dw4.stmm_shift_down = 0;
3875     sampler_dn[index].dw4.minimum_stmm = 0;
3876
3877     sampler_dn[index].dw5.fmd_temporal_difference_threshold = 0;
3878     sampler_dn[index].dw5.sdi_fallback_mode_2_constant = 0;
3879     sampler_dn[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
3880     sampler_dn[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
3881
3882     sampler_dn[index].dw6.dn_enable = 1;
3883     sampler_dn[index].dw6.di_enable = 0;
3884     sampler_dn[index].dw6.di_partial = 0;
3885     sampler_dn[index].dw6.dndi_top_first = dndi_top_first;
3886     sampler_dn[index].dw6.dndi_stream_id = 1;
3887     sampler_dn[index].dw6.dndi_first_frame = 1;
3888     sampler_dn[index].dw6.progressive_dn = dn_progressive;
3889     sampler_dn[index].dw6.mcdi_enable = 0;
3890     sampler_dn[index].dw6.fmd_tear_threshold = 32;
3891     sampler_dn[index].dw6.cat_th1 = 0;
3892     sampler_dn[index].dw6.fmd2_vertical_difference_threshold = 32;
3893     sampler_dn[index].dw6.fmd1_vertical_difference_threshold = 32;
3894
3895     sampler_dn[index].dw7.sad_tha = 5;
3896     sampler_dn[index].dw7.sad_thb = 10;
3897     sampler_dn[index].dw7.fmd_for_1st_field_of_current_frame = 2;
3898     sampler_dn[index].dw7.mc_pixel_consistency_th = 25;
3899     sampler_dn[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
3900     sampler_dn[index].dw7.vdi_walker_enable = 0;
3901     sampler_dn[index].dw7.neighborpixel_th = 10;
3902     sampler_dn[index].dw7.column_width_minus1 = w / 16;
3903
3904     dri_bo_unmap(pp_context->sampler_state_table.bo);
3905
3906     /* private function & data */
3907     pp_context->pp_x_steps = gen7_pp_dn_x_steps;
3908     pp_context->pp_y_steps = gen7_pp_dn_y_steps;
3909     pp_context->pp_set_block_parameter = gen7_pp_dn_set_block_parameter;
3910
3911     pp_static_parameter->grf1.di_statistics_surface_pitch_div2 = w / 2;
3912     pp_static_parameter->grf1.di_statistics_surface_height_div4 = h / 4;
3913     pp_static_parameter->grf1.di_top_field_first = 0;
3914     pp_static_parameter->grf1.pointer_to_inline_parameter = 7;
3915
3916     pp_static_parameter->grf2.di_destination_packed_y_component_offset = 0;
3917     pp_static_parameter->grf2.di_destination_packed_u_component_offset = 1;
3918     pp_static_parameter->grf2.di_destination_packed_v_component_offset = 3;
3919
3920     pp_static_parameter->grf4.di_hoffset_svf_from_dvf = 0;
3921     pp_static_parameter->grf4.di_voffset_svf_from_dvf = 0;
3922
3923     pp_dn_context->dest_w = w;
3924     pp_dn_context->dest_h = h;
3925
3926     dst_surface->flags = src_surface->flags;
3927
3928     return VA_STATUS_SUCCESS;
3929 }
3930
3931 static VAStatus
3932 ironlake_pp_initialize(
3933     VADriverContextP   ctx,
3934     struct i965_post_processing_context *pp_context,
3935     const struct i965_surface *src_surface,
3936     const VARectangle *src_rect,
3937     struct i965_surface *dst_surface,
3938     const VARectangle *dst_rect,
3939     int                pp_index,
3940     void *filter_param
3941 )
3942 {
3943     VAStatus va_status;
3944     struct i965_driver_data *i965 = i965_driver_data(ctx);
3945     struct pp_module *pp_module;
3946     dri_bo *bo;
3947     int static_param_size, inline_param_size;
3948
3949     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
3950     bo = dri_bo_alloc(i965->intel.bufmgr,
3951                       "surface state & binding table",
3952                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
3953                       4096);
3954     assert(bo);
3955     pp_context->surface_state_binding_table.bo = bo;
3956
3957     dri_bo_unreference(pp_context->curbe.bo);
3958     bo = dri_bo_alloc(i965->intel.bufmgr,
3959                       "constant buffer",
3960                       4096, 
3961                       4096);
3962     assert(bo);
3963     pp_context->curbe.bo = bo;
3964
3965     dri_bo_unreference(pp_context->idrt.bo);
3966     bo = dri_bo_alloc(i965->intel.bufmgr, 
3967                       "interface discriptor", 
3968                       sizeof(struct i965_interface_descriptor), 
3969                       4096);
3970     assert(bo);
3971     pp_context->idrt.bo = bo;
3972     pp_context->idrt.num_interface_descriptors = 0;
3973
3974     dri_bo_unreference(pp_context->sampler_state_table.bo);
3975     bo = dri_bo_alloc(i965->intel.bufmgr, 
3976                       "sampler state table", 
3977                       4096,
3978                       4096);
3979     assert(bo);
3980     dri_bo_map(bo, True);
3981     memset(bo->virtual, 0, bo->size);
3982     dri_bo_unmap(bo);
3983     pp_context->sampler_state_table.bo = bo;
3984
3985     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
3986     bo = dri_bo_alloc(i965->intel.bufmgr, 
3987                       "sampler 8x8 state ",
3988                       4096,
3989                       4096);
3990     assert(bo);
3991     pp_context->sampler_state_table.bo_8x8 = bo;
3992
3993     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
3994     bo = dri_bo_alloc(i965->intel.bufmgr, 
3995                       "sampler 8x8 state ",
3996                       4096,
3997                       4096);
3998     assert(bo);
3999     pp_context->sampler_state_table.bo_8x8_uv = bo;
4000
4001     dri_bo_unreference(pp_context->vfe_state.bo);
4002     bo = dri_bo_alloc(i965->intel.bufmgr, 
4003                       "vfe state", 
4004                       sizeof(struct i965_vfe_state), 
4005                       4096);
4006     assert(bo);
4007     pp_context->vfe_state.bo = bo;
4008
4009     static_param_size = sizeof(struct pp_static_parameter);
4010     inline_param_size = sizeof(struct pp_inline_parameter);
4011
4012     memset(pp_context->pp_static_parameter, 0, static_param_size);
4013     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4014     
4015     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4016     pp_context->current_pp = pp_index;
4017     pp_module = &pp_context->pp_modules[pp_index];
4018     
4019     if (pp_module->initialize)
4020         va_status = pp_module->initialize(ctx, pp_context,
4021                                           src_surface,
4022                                           src_rect,
4023                                           dst_surface,
4024                                           dst_rect,
4025                                           filter_param);
4026     else
4027         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4028
4029     return va_status;
4030 }
4031
4032 static VAStatus
4033 ironlake_post_processing(
4034     VADriverContextP   ctx,
4035     struct i965_post_processing_context *pp_context,
4036     const struct i965_surface *src_surface,
4037     const VARectangle *src_rect,
4038     struct i965_surface *dst_surface,
4039     const VARectangle *dst_rect,
4040     int                pp_index,
4041     void *filter_param
4042 )
4043 {
4044     VAStatus va_status;
4045
4046     va_status = ironlake_pp_initialize(ctx, pp_context,
4047                                        src_surface,
4048                                        src_rect,
4049                                        dst_surface,
4050                                        dst_rect,
4051                                        pp_index,
4052                                        filter_param);
4053
4054     if (va_status == VA_STATUS_SUCCESS) {
4055         ironlake_pp_states_setup(ctx, pp_context);
4056         ironlake_pp_pipeline_setup(ctx, pp_context);
4057     }
4058
4059     return va_status;
4060 }
4061
4062 static VAStatus
4063 gen6_pp_initialize(
4064     VADriverContextP   ctx,
4065     struct i965_post_processing_context *pp_context,
4066     const struct i965_surface *src_surface,
4067     const VARectangle *src_rect,
4068     struct i965_surface *dst_surface,
4069     const VARectangle *dst_rect,
4070     int                pp_index,
4071     void *filter_param
4072 )
4073 {
4074     VAStatus va_status;
4075     struct i965_driver_data *i965 = i965_driver_data(ctx);
4076     struct pp_module *pp_module;
4077     dri_bo *bo;
4078     int static_param_size, inline_param_size;
4079
4080     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
4081     bo = dri_bo_alloc(i965->intel.bufmgr,
4082                       "surface state & binding table",
4083                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_PP_SURFACES,
4084                       4096);
4085     assert(bo);
4086     pp_context->surface_state_binding_table.bo = bo;
4087
4088     dri_bo_unreference(pp_context->curbe.bo);
4089     bo = dri_bo_alloc(i965->intel.bufmgr,
4090                       "constant buffer",
4091                       4096, 
4092                       4096);
4093     assert(bo);
4094     pp_context->curbe.bo = bo;
4095
4096     dri_bo_unreference(pp_context->idrt.bo);
4097     bo = dri_bo_alloc(i965->intel.bufmgr, 
4098                       "interface discriptor", 
4099                       sizeof(struct gen6_interface_descriptor_data), 
4100                       4096);
4101     assert(bo);
4102     pp_context->idrt.bo = bo;
4103     pp_context->idrt.num_interface_descriptors = 0;
4104
4105     dri_bo_unreference(pp_context->sampler_state_table.bo);
4106     bo = dri_bo_alloc(i965->intel.bufmgr, 
4107                       "sampler state table", 
4108                       4096,
4109                       4096);
4110     assert(bo);
4111     dri_bo_map(bo, True);
4112     memset(bo->virtual, 0, bo->size);
4113     dri_bo_unmap(bo);
4114     pp_context->sampler_state_table.bo = bo;
4115
4116     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
4117     bo = dri_bo_alloc(i965->intel.bufmgr, 
4118                       "sampler 8x8 state ",
4119                       4096,
4120                       4096);
4121     assert(bo);
4122     pp_context->sampler_state_table.bo_8x8 = bo;
4123
4124     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
4125     bo = dri_bo_alloc(i965->intel.bufmgr, 
4126                       "sampler 8x8 state ",
4127                       4096,
4128                       4096);
4129     assert(bo);
4130     pp_context->sampler_state_table.bo_8x8_uv = bo;
4131
4132     dri_bo_unreference(pp_context->vfe_state.bo);
4133     bo = dri_bo_alloc(i965->intel.bufmgr, 
4134                       "vfe state", 
4135                       sizeof(struct i965_vfe_state), 
4136                       4096);
4137     assert(bo);
4138     pp_context->vfe_state.bo = bo;
4139     
4140     if (IS_GEN7(i965->intel.device_id)) {
4141         static_param_size = sizeof(struct gen7_pp_static_parameter);
4142         inline_param_size = sizeof(struct gen7_pp_inline_parameter);
4143     } else {
4144         static_param_size = sizeof(struct pp_static_parameter);
4145         inline_param_size = sizeof(struct pp_inline_parameter);
4146     }
4147
4148     memset(pp_context->pp_static_parameter, 0, static_param_size);
4149     memset(pp_context->pp_inline_parameter, 0, inline_param_size);
4150
4151     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
4152     pp_context->current_pp = pp_index;
4153     pp_module = &pp_context->pp_modules[pp_index];
4154     
4155     if (pp_module->initialize)
4156         va_status = pp_module->initialize(ctx, pp_context,
4157                                           src_surface,
4158                                           src_rect,
4159                                           dst_surface,
4160                                           dst_rect,
4161                                           filter_param);
4162     else
4163         va_status = VA_STATUS_ERROR_UNIMPLEMENTED;
4164
4165     calculate_boundary_block_mask(pp_context, dst_rect);
4166     
4167     return va_status;
4168 }
4169
4170 static void
4171 gen6_pp_interface_descriptor_table(VADriverContextP   ctx,
4172                                    struct i965_post_processing_context *pp_context)
4173 {
4174     struct i965_driver_data *i965 = i965_driver_data(ctx);
4175     struct gen6_interface_descriptor_data *desc;
4176     dri_bo *bo;
4177     int pp_index = pp_context->current_pp;
4178
4179     bo = pp_context->idrt.bo;
4180     dri_bo_map(bo, True);
4181     assert(bo->virtual);
4182     desc = bo->virtual;
4183     memset(desc, 0, sizeof(*desc));
4184     desc->desc0.kernel_start_pointer = 
4185         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
4186     desc->desc1.single_program_flow = 1;
4187     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
4188     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
4189     desc->desc2.sampler_state_pointer = 
4190         pp_context->sampler_state_table.bo->offset >> 5;
4191     desc->desc3.binding_table_entry_count = 0;
4192     desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
4193     desc->desc4.constant_urb_entry_read_offset = 0;
4194
4195     if (IS_GEN7(i965->intel.device_id))
4196         desc->desc4.constant_urb_entry_read_length = 6; /* grf 1-6 */
4197     else
4198         desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
4199
4200     dri_bo_emit_reloc(bo,
4201                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4202                       0,
4203                       offsetof(struct gen6_interface_descriptor_data, desc0),
4204                       pp_context->pp_modules[pp_index].kernel.bo);
4205
4206     dri_bo_emit_reloc(bo,
4207                       I915_GEM_DOMAIN_INSTRUCTION, 0,
4208                       desc->desc2.sampler_count << 2,
4209                       offsetof(struct gen6_interface_descriptor_data, desc2),
4210                       pp_context->sampler_state_table.bo);
4211
4212     dri_bo_unmap(bo);
4213     pp_context->idrt.num_interface_descriptors++;
4214 }
4215
4216 static void
4217 gen6_pp_upload_constants(VADriverContextP ctx,
4218                          struct i965_post_processing_context *pp_context)
4219 {
4220     struct i965_driver_data *i965 = i965_driver_data(ctx);
4221     unsigned char *constant_buffer;
4222     int param_size;
4223
4224     assert(sizeof(struct pp_static_parameter) == 128);
4225     assert(sizeof(struct gen7_pp_static_parameter) == 192);
4226
4227     if (IS_GEN7(i965->intel.device_id))
4228         param_size = sizeof(struct gen7_pp_static_parameter);
4229     else
4230         param_size = sizeof(struct pp_static_parameter);
4231
4232     dri_bo_map(pp_context->curbe.bo, 1);
4233     assert(pp_context->curbe.bo->virtual);
4234     constant_buffer = pp_context->curbe.bo->virtual;
4235     memcpy(constant_buffer, pp_context->pp_static_parameter, param_size);
4236     dri_bo_unmap(pp_context->curbe.bo);
4237 }
4238
4239 static void
4240 gen6_pp_states_setup(VADriverContextP ctx,
4241                      struct i965_post_processing_context *pp_context)
4242 {
4243     gen6_pp_interface_descriptor_table(ctx, pp_context);
4244     gen6_pp_upload_constants(ctx, pp_context);
4245 }
4246
4247 static void
4248 gen6_pp_pipeline_select(VADriverContextP ctx,
4249                         struct i965_post_processing_context *pp_context)
4250 {
4251     struct intel_batchbuffer *batch = pp_context->batch;
4252
4253     BEGIN_BATCH(batch, 1);
4254     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
4255     ADVANCE_BATCH(batch);
4256 }
4257
4258 static void
4259 gen6_pp_state_base_address(VADriverContextP ctx,
4260                            struct i965_post_processing_context *pp_context)
4261 {
4262     struct intel_batchbuffer *batch = pp_context->batch;
4263
4264     BEGIN_BATCH(batch, 10);
4265     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
4266     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4267     OUT_RELOC(batch, pp_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
4268     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4269     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4270     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4271     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4272     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4273     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4274     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
4275     ADVANCE_BATCH(batch);
4276 }
4277
4278 static void
4279 gen6_pp_vfe_state(VADriverContextP ctx,
4280                   struct i965_post_processing_context *pp_context)
4281 {
4282     struct intel_batchbuffer *batch = pp_context->batch;
4283
4284     BEGIN_BATCH(batch, 8);
4285     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
4286     OUT_BATCH(batch, 0);
4287     OUT_BATCH(batch,
4288               (pp_context->urb.num_vfe_entries - 1) << 16 |
4289               pp_context->urb.num_vfe_entries << 8);
4290     OUT_BATCH(batch, 0);
4291     OUT_BATCH(batch,
4292               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* URB Entry Allocation Size, in 256 bits unit */
4293               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2)); /* CURBE Allocation Size, in 256 bits unit */
4294     OUT_BATCH(batch, 0);
4295     OUT_BATCH(batch, 0);
4296     OUT_BATCH(batch, 0);
4297     ADVANCE_BATCH(batch);
4298 }
4299
4300 static void
4301 gen6_pp_curbe_load(VADriverContextP ctx,
4302                    struct i965_post_processing_context *pp_context)
4303 {
4304     struct intel_batchbuffer *batch = pp_context->batch;
4305
4306     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32 <= pp_context->curbe.bo->size);
4307
4308     BEGIN_BATCH(batch, 4);
4309     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
4310     OUT_BATCH(batch, 0);
4311     OUT_BATCH(batch,
4312               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 * 32);
4313     OUT_RELOC(batch, 
4314               pp_context->curbe.bo,
4315               I915_GEM_DOMAIN_INSTRUCTION, 0,
4316               0);
4317     ADVANCE_BATCH(batch);
4318 }
4319
4320 static void
4321 gen6_interface_descriptor_load(VADriverContextP ctx,
4322                                struct i965_post_processing_context *pp_context)
4323 {
4324     struct intel_batchbuffer *batch = pp_context->batch;
4325
4326     BEGIN_BATCH(batch, 4);
4327     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
4328     OUT_BATCH(batch, 0);
4329     OUT_BATCH(batch,
4330               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
4331     OUT_RELOC(batch, 
4332               pp_context->idrt.bo,
4333               I915_GEM_DOMAIN_INSTRUCTION, 0,
4334               0);
4335     ADVANCE_BATCH(batch);
4336 }
4337
4338 static void update_block_mask_parameter(struct i965_post_processing_context *pp_context, int x, int y, int x_steps, int y_steps) 
4339 {
4340     struct pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
4341
4342     pp_inline_parameter->grf5.block_vertical_mask = 0xff;
4343     pp_inline_parameter->grf6.block_vertical_mask_bottom = pp_context->block_vertical_mask_bottom;
4344     // for the first block, it always on the left edge. the second block will reload horizontal_mask from grf6.block_horizontal_mask_middle
4345     pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_left;
4346     pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4347     pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_right;
4348
4349     /* 1 x N */
4350     if (x_steps == 1) {
4351         if (y == y_steps-1) {
4352             pp_inline_parameter->grf5.block_vertical_mask = pp_context->block_vertical_mask_bottom;
4353         }
4354         else {
4355             pp_inline_parameter->grf6.block_vertical_mask_bottom = 0xff;
4356         }
4357     }
4358
4359     /* M x 1 */
4360     if (y_steps == 1) {
4361         if (x == 0) { // all blocks in this group are on the left edge
4362             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_left;
4363             pp_inline_parameter->grf6.block_horizontal_mask_right = pp_context->block_horizontal_mask_left; 
4364         }
4365         else if (x == x_steps-1) {
4366             pp_inline_parameter->grf5.block_horizontal_mask = pp_context->block_horizontal_mask_right;
4367             pp_inline_parameter->grf6.block_horizontal_mask_middle = pp_context->block_horizontal_mask_right;
4368         }
4369         else {
4370             pp_inline_parameter->grf5.block_horizontal_mask = 0xffff;
4371             pp_inline_parameter->grf6.block_horizontal_mask_middle = 0xffff;
4372             pp_inline_parameter->grf6.block_horizontal_mask_right = 0xffff;
4373         }
4374     }
4375
4376 }
4377
4378 static void
4379 gen6_pp_object_walker(VADriverContextP ctx,
4380                       struct i965_post_processing_context *pp_context)
4381 {
4382     struct i965_driver_data *i965 = i965_driver_data(ctx);
4383     struct intel_batchbuffer *batch = pp_context->batch;
4384     int x, x_steps, y, y_steps;
4385     int param_size, command_length_in_dws;
4386     dri_bo *command_buffer;
4387     unsigned int *command_ptr;
4388
4389     if (IS_GEN7(i965->intel.device_id))
4390         param_size = sizeof(struct gen7_pp_inline_parameter);
4391     else
4392         param_size = sizeof(struct pp_inline_parameter);
4393
4394     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
4395     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
4396     command_length_in_dws = 6 + (param_size >> 2);
4397     command_buffer = dri_bo_alloc(i965->intel.bufmgr,
4398                                   "command objects buffer",
4399                                   command_length_in_dws * 4 * x_steps * y_steps + 8,
4400                                   4096);
4401
4402     dri_bo_map(command_buffer, 1);
4403     command_ptr = command_buffer->virtual;
4404
4405     for (y = 0; y < y_steps; y++) {
4406         for (x = 0; x < x_steps; x++) {
4407             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
4408                 // some common block parameter update goes here, apply to all pp functions
4409                 if (IS_GEN6(i965->intel.device_id))
4410                     update_block_mask_parameter (pp_context, x, y, x_steps, y_steps);
4411                 
4412                 *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
4413                 *command_ptr++ = 0;
4414                 *command_ptr++ = 0;
4415                 *command_ptr++ = 0;
4416                 *command_ptr++ = 0;
4417                 *command_ptr++ = 0;
4418                 memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
4419                 command_ptr += (param_size >> 2);
4420             }
4421         }
4422     }
4423
4424     if (command_length_in_dws * x_steps * y_steps % 2 == 0)
4425         *command_ptr++ = 0;
4426
4427     *command_ptr = MI_BATCH_BUFFER_END;
4428
4429     dri_bo_unmap(command_buffer);
4430
4431     BEGIN_BATCH(batch, 2);
4432     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
4433     OUT_RELOC(batch, command_buffer, 
4434               I915_GEM_DOMAIN_COMMAND, 0, 
4435               0);
4436     ADVANCE_BATCH(batch);
4437     
4438     dri_bo_unreference(command_buffer);
4439
4440     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
4441      * will cause control to pass back to ring buffer 
4442      */
4443     intel_batchbuffer_end_atomic(batch);
4444     intel_batchbuffer_flush(batch);
4445     intel_batchbuffer_start_atomic(batch, 0x1000);
4446 }
4447
4448 static void
4449 gen6_pp_pipeline_setup(VADriverContextP ctx,
4450                        struct i965_post_processing_context *pp_context)
4451 {
4452     struct intel_batchbuffer *batch = pp_context->batch;
4453
4454     intel_batchbuffer_start_atomic(batch, 0x1000);
4455     intel_batchbuffer_emit_mi_flush(batch);
4456     gen6_pp_pipeline_select(ctx, pp_context);
4457     gen6_pp_state_base_address(ctx, pp_context);
4458     gen6_pp_vfe_state(ctx, pp_context);
4459     gen6_pp_curbe_load(ctx, pp_context);
4460     gen6_interface_descriptor_load(ctx, pp_context);
4461     gen6_pp_object_walker(ctx, pp_context);
4462     intel_batchbuffer_end_atomic(batch);
4463 }
4464
4465 static VAStatus
4466 gen6_post_processing(
4467     VADriverContextP   ctx,
4468     struct i965_post_processing_context *pp_context,
4469     const struct i965_surface *src_surface,
4470     const VARectangle *src_rect,
4471     struct i965_surface *dst_surface,
4472     const VARectangle *dst_rect,
4473     int                pp_index,
4474     void * filter_param
4475 )
4476 {
4477     VAStatus va_status;
4478     
4479     va_status = gen6_pp_initialize(ctx, pp_context,
4480                                    src_surface,
4481                                    src_rect,
4482                                    dst_surface,
4483                                    dst_rect,
4484                                    pp_index,
4485                                    filter_param);
4486
4487     if (va_status == VA_STATUS_SUCCESS) {
4488         gen6_pp_states_setup(ctx, pp_context);
4489         gen6_pp_pipeline_setup(ctx, pp_context);
4490     }
4491
4492     return va_status;
4493 }
4494
4495 static VAStatus
4496 i965_post_processing_internal(
4497     VADriverContextP   ctx,
4498     struct i965_post_processing_context *pp_context,
4499     const struct i965_surface *src_surface,
4500     const VARectangle *src_rect,
4501     struct i965_surface *dst_surface,
4502     const VARectangle *dst_rect,
4503     int                pp_index,
4504     void *filter_param
4505 )
4506 {
4507     struct i965_driver_data *i965 = i965_driver_data(ctx);
4508     VAStatus va_status;
4509
4510     if (IS_GEN6(i965->intel.device_id) ||
4511         IS_GEN7(i965->intel.device_id))
4512         va_status = gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4513     else
4514         va_status = ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index, filter_param);
4515     
4516     return va_status;
4517 }
4518
4519 VAStatus 
4520 i965_DestroySurfaces(VADriverContextP ctx,
4521                      VASurfaceID *surface_list,
4522                      int num_surfaces);
4523 VAStatus 
4524 i965_CreateSurfaces(VADriverContextP ctx,
4525                     int width,
4526                     int height,
4527                     int format,
4528                     int num_surfaces,
4529                     VASurfaceID *surfaces);
4530
4531 static void
4532 rgb_to_yuv(unsigned int argb,
4533            unsigned char *y,
4534            unsigned char *u,
4535            unsigned char *v,
4536            unsigned char *a)
4537 {
4538     int r = ((argb >> 16) & 0xff);
4539     int g = ((argb >> 8) & 0xff);
4540     int b = ((argb >> 0) & 0xff);
4541     
4542     *y = (257 * r + 504 * g + 98 * b) / 1000 + 16;
4543     *v = (439 * r - 368 * g - 71 * b) / 1000 + 128;
4544     *u = (-148 * r - 291 * g + 439 * b) / 1000 + 128;
4545     *a = ((argb >> 24) & 0xff);
4546 }
4547
4548 static void 
4549 i965_vpp_clear_surface(VADriverContextP ctx,
4550                        struct i965_post_processing_context *pp_context,
4551                        VASurfaceID surface,
4552                        unsigned int color)
4553 {
4554     struct i965_driver_data *i965 = i965_driver_data(ctx);
4555     struct intel_batchbuffer *batch = pp_context->batch;
4556     struct object_surface *obj_surface = SURFACE(surface);
4557     unsigned int blt_cmd, br13;
4558     unsigned int tiling = 0, swizzle = 0;
4559     int pitch;
4560     unsigned char y, u, v, a = 0;
4561
4562     /* Currently only support NV12 surface */
4563     if (!obj_surface || obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4564         return;
4565
4566     rgb_to_yuv(color, &y, &u, &v, &a);
4567
4568     if (a == 0)
4569         return;
4570
4571     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
4572     blt_cmd = XY_COLOR_BLT_CMD;
4573     pitch = obj_surface->width;
4574
4575     if (tiling != I915_TILING_NONE) {
4576         blt_cmd |= XY_COLOR_BLT_DST_TILED;
4577         pitch >>= 2;
4578     }
4579
4580     br13 = 0xf0 << 16;
4581     br13 |= BR13_8;
4582     br13 |= pitch;
4583
4584     if (IS_GEN6(i965->intel.device_id) ||
4585         IS_GEN7(i965->intel.device_id)) {
4586         intel_batchbuffer_start_atomic_blt(batch, 48);
4587         BEGIN_BLT_BATCH(batch, 12);
4588     } else {
4589         intel_batchbuffer_start_atomic(batch, 48);
4590         BEGIN_BATCH(batch, 12);
4591     }
4592
4593     OUT_BATCH(batch, blt_cmd);
4594     OUT_BATCH(batch, br13);
4595     OUT_BATCH(batch,
4596               0 << 16 |
4597               0);
4598     OUT_BATCH(batch,
4599               obj_surface->height << 16 |
4600               obj_surface->width);
4601     OUT_RELOC(batch, obj_surface->bo, 
4602               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4603               0);
4604     OUT_BATCH(batch, y);
4605
4606     br13 = 0xf0 << 16;
4607     br13 |= BR13_565;
4608     br13 |= pitch;
4609
4610     OUT_BATCH(batch, blt_cmd);
4611     OUT_BATCH(batch, br13);
4612     OUT_BATCH(batch,
4613               0 << 16 |
4614               0);
4615     OUT_BATCH(batch,
4616               obj_surface->height / 2 << 16 |
4617               obj_surface->width / 2);
4618     OUT_RELOC(batch, obj_surface->bo, 
4619               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
4620               obj_surface->width * obj_surface->y_cb_offset);
4621     OUT_BATCH(batch, v << 8 | u);
4622
4623     ADVANCE_BATCH(batch);
4624     intel_batchbuffer_end_atomic(batch);
4625 }
4626
4627 VAStatus
4628 i965_scaling_processing(
4629     VADriverContextP   ctx,
4630     VASurfaceID        src_surface_id,
4631     const VARectangle *src_rect,
4632     VASurfaceID        dst_surface_id,
4633     const VARectangle *dst_rect,
4634     unsigned int       flags)
4635 {
4636     VAStatus va_status = VA_STATUS_SUCCESS;
4637     struct i965_driver_data *i965 = i965_driver_data(ctx);
4638     struct object_surface *src_surface_obj = SURFACE(src_surface_id);
4639     struct object_surface *dst_surface_obj = SURFACE(dst_surface_id);
4640  
4641     assert(src_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4642     assert(dst_surface_obj->fourcc == VA_FOURCC('N', 'V', '1', '2'));
4643
4644     if (HAS_PP(i965) && (flags & I965_PP_FLAG_AVS)) {
4645         struct i965_surface src_surface;
4646         struct i965_surface dst_surface;
4647
4648          _i965LockMutex(&i965->pp_mutex);
4649
4650          src_surface.id = src_surface_id;
4651          src_surface.type = I965_SURFACE_TYPE_SURFACE;
4652          src_surface.flags = I965_SURFACE_FLAG_FRAME;
4653          dst_surface.id = dst_surface_id;
4654          dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4655          dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4656
4657          va_status = i965_post_processing_internal(ctx, i965->pp_context,
4658                                                    &src_surface,
4659                                                    src_rect,
4660                                                    &dst_surface,
4661                                                    dst_rect,
4662                                                    PP_NV12_AVS,
4663                                                    NULL);
4664
4665          _i965UnlockMutex(&i965->pp_mutex);
4666     }
4667
4668     return va_status;
4669 }
4670
4671 VASurfaceID
4672 i965_post_processing(
4673     VADriverContextP   ctx,
4674     VASurfaceID        surface,
4675     const VARectangle *src_rect,
4676     const VARectangle *dst_rect,
4677     unsigned int       flags,
4678     int               *has_done_scaling  
4679 )
4680 {
4681     struct i965_driver_data *i965 = i965_driver_data(ctx);
4682     VASurfaceID in_surface_id = surface;
4683     VASurfaceID out_surface_id = VA_INVALID_ID;
4684     
4685     *has_done_scaling = 0;
4686
4687     if (HAS_PP(i965)) {
4688         struct object_surface *obj_surface;
4689         VAStatus status;
4690         struct i965_surface src_surface;
4691         struct i965_surface dst_surface;
4692
4693         obj_surface = SURFACE(in_surface_id);
4694
4695         /* Currently only support post processing for NV12 surface */
4696         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
4697             return out_surface_id;
4698
4699         _i965LockMutex(&i965->pp_mutex);
4700
4701         if (flags & I965_PP_FLAG_MCDI) {
4702             status = i965_CreateSurfaces(ctx,
4703                                          obj_surface->orig_width,
4704                                          obj_surface->orig_height,
4705                                          VA_RT_FORMAT_YUV420,
4706                                          1,
4707                                          &out_surface_id);
4708             assert(status == VA_STATUS_SUCCESS);
4709             obj_surface = SURFACE(out_surface_id);
4710             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4711             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4712             src_surface.id = in_surface_id;
4713             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4714             src_surface.flags = (flags & I965_PP_FLAG_TOP_FIELD) ? 
4715                 I965_SURFACE_FLAG_TOP_FIELD_FIRST : I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST;
4716             dst_surface.id = out_surface_id;
4717             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4718             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4719
4720             i965_post_processing_internal(ctx, i965->pp_context,
4721                                           &src_surface,
4722                                           src_rect,
4723                                           &dst_surface,
4724                                           dst_rect,
4725                                           PP_NV12_DNDI,
4726                                           NULL);
4727         }
4728
4729         if (flags & I965_PP_FLAG_AVS) {
4730             struct i965_render_state *render_state = &i965->render_state;
4731             struct intel_region *dest_region = render_state->draw_region;
4732
4733             if (out_surface_id != VA_INVALID_ID)
4734                 in_surface_id = out_surface_id;
4735
4736             status = i965_CreateSurfaces(ctx,
4737                                          dest_region->width,
4738                                          dest_region->height,
4739                                          VA_RT_FORMAT_YUV420,
4740                                          1,
4741                                          &out_surface_id);
4742             assert(status == VA_STATUS_SUCCESS);
4743             obj_surface = SURFACE(out_surface_id);
4744             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
4745             i965_vpp_clear_surface(ctx, i965->pp_context, out_surface_id, 0); 
4746             src_surface.id = in_surface_id;
4747             src_surface.type = I965_SURFACE_TYPE_SURFACE;
4748             src_surface.flags = I965_SURFACE_FLAG_FRAME;
4749             dst_surface.id = out_surface_id;
4750             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
4751             dst_surface.flags = I965_SURFACE_FLAG_FRAME;
4752
4753             i965_post_processing_internal(ctx, i965->pp_context,
4754                                           &src_surface,
4755                                           src_rect,
4756                                           &dst_surface,
4757                                           dst_rect,
4758                                           PP_NV12_AVS,
4759                                           NULL);
4760
4761             if (in_surface_id != surface)
4762                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
4763                 
4764             *has_done_scaling = 1;
4765         }
4766
4767         _i965UnlockMutex(&i965->pp_mutex);
4768     }
4769
4770     return out_surface_id;
4771 }       
4772
4773 static VAStatus
4774 i965_image_pl1_rgbx_processing(VADriverContextP ctx,
4775                           const struct i965_surface *src_surface,
4776                           const VARectangle *src_rect,
4777                           struct i965_surface *dst_surface,
4778                           const VARectangle *dst_rect)
4779 {
4780     struct i965_driver_data *i965 = i965_driver_data(ctx);
4781     struct i965_post_processing_context *pp_context = i965->pp_context;
4782     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4783
4784     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4785         i965_post_processing_internal(ctx, i965->pp_context,
4786                                       src_surface,
4787                                       src_rect,
4788                                       dst_surface,
4789                                       dst_rect,
4790                                       PP_RGBX_LOAD_SAVE_NV12,
4791                                       NULL);
4792     } else {
4793         assert(0);
4794         return VA_STATUS_ERROR_UNKNOWN;
4795     }
4796
4797     intel_batchbuffer_flush(pp_context->batch);
4798
4799     return VA_STATUS_SUCCESS;
4800 }
4801
4802 static VAStatus
4803 i965_image_pl3_processing(VADriverContextP ctx,
4804                           const struct i965_surface *src_surface,
4805                           const VARectangle *src_rect,
4806                           struct i965_surface *dst_surface,
4807                           const VARectangle *dst_rect)
4808 {
4809     struct i965_driver_data *i965 = i965_driver_data(ctx);
4810     struct i965_post_processing_context *pp_context = i965->pp_context;
4811     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4812     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4813
4814     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4815         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4816                                                  src_surface,
4817                                                  src_rect,
4818                                                  dst_surface,
4819                                                  dst_rect,
4820                                                  PP_PL3_LOAD_SAVE_N12,
4821                                                  NULL);
4822     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4823                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4824                fourcc == VA_FOURCC('Y', 'V', '1', '2') || 
4825                fourcc == VA_FOURCC('I', '4', '2', '0')) {
4826         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4827                                                  src_surface,
4828                                                  src_rect,
4829                                                  dst_surface,
4830                                                  dst_rect,
4831                                                  PP_PL3_LOAD_SAVE_PL3,
4832                                                  NULL);
4833     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4834                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4835         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4836                                                  src_surface,
4837                                                  src_rect,
4838                                                  dst_surface,
4839                                                  dst_rect,
4840                                                  PP_PL3_LOAD_SAVE_PA,
4841                                                  NULL);
4842     }
4843     else {
4844         assert(0);
4845     }
4846
4847     intel_batchbuffer_flush(pp_context->batch);
4848
4849     return vaStatus;
4850 }
4851
4852 static VAStatus
4853 i965_image_pl2_processing(VADriverContextP ctx,
4854                           const struct i965_surface *src_surface,
4855                           const VARectangle *src_rect,
4856                           struct i965_surface *dst_surface,
4857                           const VARectangle *dst_rect)
4858 {
4859     struct i965_driver_data *i965 = i965_driver_data(ctx);
4860     struct i965_post_processing_context *pp_context = i965->pp_context;
4861     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4862     VAStatus vaStatus = VA_STATUS_ERROR_UNIMPLEMENTED;
4863
4864     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4865         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4866                                                  src_surface,
4867                                                  src_rect,
4868                                                  dst_surface,
4869                                                  dst_rect,
4870                                                  PP_NV12_LOAD_SAVE_N12,
4871                                                  NULL);
4872     } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') || 
4873                fourcc == VA_FOURCC('I', 'M', 'C', '3') || 
4874                fourcc == VA_FOURCC('Y', 'V', '1', '2') ||
4875                fourcc == VA_FOURCC('I', '4', '2', '0') ) {
4876         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4877                                                  src_surface,
4878                                                  src_rect,
4879                                                  dst_surface,
4880                                                  dst_rect,
4881                                                  PP_NV12_LOAD_SAVE_PL3,
4882                                                  NULL);
4883     } else if (fourcc == VA_FOURCC('Y', 'U', 'Y', '2') ||
4884                fourcc == VA_FOURCC('U', 'Y', 'V', 'Y')) {
4885         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4886                                                  src_surface,
4887                                                  src_rect,
4888                                                  dst_surface,
4889                                                  dst_rect,
4890                                                  PP_NV12_LOAD_SAVE_PA,
4891                                                      NULL);
4892     } else if (fourcc == VA_FOURCC('B', 'G', 'R', 'X') || 
4893                fourcc == VA_FOURCC('B', 'G', 'R', 'A') ||
4894                fourcc == VA_FOURCC('R', 'G', 'B', 'X') ||
4895                fourcc == VA_FOURCC('R', 'G', 'B', 'A') ) {
4896         vaStatus = i965_post_processing_internal(ctx, i965->pp_context,
4897                                       src_surface,
4898                                       src_rect,
4899                                       dst_surface,
4900                                       dst_rect,
4901                                       PP_NV12_LOAD_SAVE_RGBX,
4902                                       NULL);
4903     } else {
4904         assert(0);
4905         return VA_STATUS_ERROR_UNKNOWN;
4906     }
4907
4908     intel_batchbuffer_flush(pp_context->batch);
4909
4910     return vaStatus;
4911 }
4912
4913 static VAStatus
4914 i965_image_pl1_processing(VADriverContextP ctx,
4915                           const struct i965_surface *src_surface,
4916                           const VARectangle *src_rect,
4917                           struct i965_surface *dst_surface,
4918                           const VARectangle *dst_rect)
4919 {
4920     struct i965_driver_data *i965 = i965_driver_data(ctx);
4921     struct i965_post_processing_context *pp_context = i965->pp_context;
4922     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
4923
4924     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
4925         i965_post_processing_internal(ctx, i965->pp_context,
4926                                       src_surface,
4927                                       src_rect,
4928                                       dst_surface,
4929                                       dst_rect,
4930                                       PP_PA_LOAD_SAVE_NV12,
4931                                       NULL);
4932     }
4933     else if (fourcc == VA_FOURCC_YV12) {
4934         i965_post_processing_internal(ctx, i965->pp_context,
4935                                       src_surface,
4936                                       src_rect,
4937                                       dst_surface,
4938                                       dst_rect,
4939                                       PP_PA_LOAD_SAVE_PL3,
4940                                       NULL);
4941
4942     }
4943     else {
4944         return VA_STATUS_ERROR_UNKNOWN;
4945     }
4946
4947     intel_batchbuffer_flush(pp_context->batch);
4948
4949     return VA_STATUS_SUCCESS;
4950 }
4951
4952 VAStatus
4953 i965_image_processing(VADriverContextP ctx,
4954                       const struct i965_surface *src_surface,
4955                       const VARectangle *src_rect,
4956                       struct i965_surface *dst_surface,
4957                       const VARectangle *dst_rect)
4958 {
4959     struct i965_driver_data *i965 = i965_driver_data(ctx);
4960     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
4961
4962     if (HAS_PP(i965)) {
4963         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
4964
4965         _i965LockMutex(&i965->pp_mutex);
4966
4967         switch (fourcc) {
4968         case VA_FOURCC('Y', 'V', '1', '2'):
4969         case VA_FOURCC('I', '4', '2', '0'):
4970         case VA_FOURCC('I', 'M', 'C', '1'):
4971         case VA_FOURCC('I', 'M', 'C', '3'):
4972             status = i965_image_pl3_processing(ctx,
4973                                                src_surface,
4974                                                src_rect,
4975                                                dst_surface,
4976                                                dst_rect);
4977             break;
4978
4979         case  VA_FOURCC('N', 'V', '1', '2'):
4980             status = i965_image_pl2_processing(ctx,
4981                                                src_surface,
4982                                                src_rect,
4983                                                dst_surface,
4984                                                dst_rect);
4985             break;
4986         case  VA_FOURCC('Y', 'U', 'Y', '2'):
4987         case VA_FOURCC('U', 'Y', 'V', 'Y'):
4988             status = i965_image_pl1_processing(ctx,
4989                                                src_surface,
4990                                                src_rect,
4991                                                dst_surface,
4992                                                dst_rect);
4993             break;
4994         case VA_FOURCC('B', 'G', 'R', 'A'):
4995         case VA_FOURCC('B', 'G', 'R', 'X'):
4996         case VA_FOURCC('R', 'G', 'B', 'A'):
4997         case VA_FOURCC('R', 'G', 'B', 'X'):
4998             status = i965_image_pl1_rgbx_processing(ctx,
4999                                                src_surface,
5000                                                src_rect,
5001                                                dst_surface,
5002                                                dst_rect);
5003             break;
5004         default:
5005             status = VA_STATUS_ERROR_UNIMPLEMENTED;
5006             break;
5007         }
5008         
5009         _i965UnlockMutex(&i965->pp_mutex);
5010     }
5011
5012     return status;
5013 }       
5014
5015 static void
5016 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
5017 {
5018     int i;
5019
5020     dri_bo_unreference(pp_context->surface_state_binding_table.bo);
5021     pp_context->surface_state_binding_table.bo = NULL;
5022
5023     dri_bo_unreference(pp_context->curbe.bo);
5024     pp_context->curbe.bo = NULL;
5025
5026     dri_bo_unreference(pp_context->sampler_state_table.bo);
5027     pp_context->sampler_state_table.bo = NULL;
5028
5029     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
5030     pp_context->sampler_state_table.bo_8x8 = NULL;
5031
5032     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
5033     pp_context->sampler_state_table.bo_8x8_uv = NULL;
5034
5035     dri_bo_unreference(pp_context->idrt.bo);
5036     pp_context->idrt.bo = NULL;
5037     pp_context->idrt.num_interface_descriptors = 0;
5038
5039     dri_bo_unreference(pp_context->vfe_state.bo);
5040     pp_context->vfe_state.bo = NULL;
5041
5042     dri_bo_unreference(pp_context->stmm.bo);
5043     pp_context->stmm.bo = NULL;
5044
5045     for (i = 0; i < NUM_PP_MODULES; i++) {
5046         struct pp_module *pp_module = &pp_context->pp_modules[i];
5047
5048         dri_bo_unreference(pp_module->kernel.bo);
5049         pp_module->kernel.bo = NULL;
5050     }
5051
5052     free(pp_context->pp_static_parameter);
5053     free(pp_context->pp_inline_parameter);
5054     pp_context->pp_static_parameter = NULL;
5055     pp_context->pp_inline_parameter = NULL;
5056 }
5057
5058 Bool
5059 i965_post_processing_terminate(VADriverContextP ctx)
5060 {
5061     struct i965_driver_data *i965 = i965_driver_data(ctx);
5062     struct i965_post_processing_context *pp_context = i965->pp_context;
5063
5064     if (pp_context) {
5065         i965_post_processing_context_finalize(pp_context);
5066         free(pp_context);
5067     }
5068
5069     i965->pp_context = NULL;
5070
5071     return True;
5072 }
5073
5074 static void
5075 i965_post_processing_context_init(VADriverContextP ctx,
5076                                   struct i965_post_processing_context *pp_context,
5077                                   struct intel_batchbuffer *batch)
5078 {
5079     struct i965_driver_data *i965 = i965_driver_data(ctx);
5080     int i;
5081
5082     pp_context->urb.size = URB_SIZE((&i965->intel));
5083     pp_context->urb.num_vfe_entries = 32;
5084     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
5085     pp_context->urb.num_cs_entries = 1;
5086     
5087     if (IS_GEN7(i965->intel.device_id))
5088         pp_context->urb.size_cs_entry = 4;      /* in 512 bits unit */
5089     else
5090         pp_context->urb.size_cs_entry = 2;
5091
5092     pp_context->urb.vfe_start = 0;
5093     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
5094         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
5095     assert(pp_context->urb.cs_start + 
5096            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
5097
5098     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
5099     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
5100     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen7));
5101     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen75));
5102
5103     if (IS_HASWELL(i965->intel.device_id))
5104         memcpy(pp_context->pp_modules, pp_modules_gen75, sizeof(pp_context->pp_modules));
5105     else if (IS_GEN7(i965->intel.device_id))
5106         memcpy(pp_context->pp_modules, pp_modules_gen7, sizeof(pp_context->pp_modules));
5107     else if (IS_GEN6(i965->intel.device_id))
5108         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
5109     else if (IS_IRONLAKE(i965->intel.device_id))
5110         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
5111
5112     for (i = 0; i < NUM_PP_MODULES; i++) {
5113         struct pp_module *pp_module = &pp_context->pp_modules[i];
5114         dri_bo_unreference(pp_module->kernel.bo);
5115         if (pp_module->kernel.bin && pp_module->kernel.size) {
5116             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
5117                                                 pp_module->kernel.name,
5118                                                 pp_module->kernel.size,
5119                                                 4096);
5120             assert(pp_module->kernel.bo);
5121             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
5122         } else {
5123             pp_module->kernel.bo = NULL;
5124         }
5125     }
5126
5127     /* static & inline parameters */
5128     if (IS_GEN7(i965->intel.device_id)) {
5129         pp_context->pp_static_parameter = calloc(sizeof(struct gen7_pp_static_parameter), 1);
5130         pp_context->pp_inline_parameter = calloc(sizeof(struct gen7_pp_inline_parameter), 1);
5131     } else {
5132         pp_context->pp_static_parameter = calloc(sizeof(struct pp_static_parameter), 1);
5133         pp_context->pp_inline_parameter = calloc(sizeof(struct pp_inline_parameter), 1);
5134     }
5135
5136     pp_context->batch = batch;
5137 }
5138
5139 Bool
5140 i965_post_processing_init(VADriverContextP ctx)
5141 {
5142     struct i965_driver_data *i965 = i965_driver_data(ctx);
5143     struct i965_post_processing_context *pp_context = i965->pp_context;
5144
5145     if (HAS_PP(i965)) {
5146         if (pp_context == NULL) {
5147             pp_context = calloc(1, sizeof(*pp_context));
5148             i965_post_processing_context_init(ctx, pp_context, i965->batch);
5149             i965->pp_context = pp_context;
5150         }
5151     }
5152
5153     return True;
5154 }
5155
5156 static const int procfilter_to_pp_flag[VAProcFilterCount] = {
5157     PP_NULL,    /* VAProcFilterNone */
5158     PP_NV12_DN, /* VAProcFilterNoiseReduction */
5159     PP_NULL,    /* VAProcFilterDeblocking */
5160     PP_NV12_DNDI, /* VAProcFilterDeinterlacing */
5161     PP_NULL,    /* VAProcFilterSharpening */
5162     PP_NULL,    /* VAProcFilterColorBalance */
5163     PP_NULL,    /* VAProcFilterColorStandard */
5164     PP_NULL,    /* VAProcFilterFrameRateConversion */
5165 };
5166
5167 static const int proc_frame_to_pp_frame[3] = {
5168     I965_SURFACE_FLAG_FRAME,
5169     I965_SURFACE_FLAG_TOP_FIELD_FIRST,
5170     I965_SURFACE_FLAG_BOTTOME_FIELD_FIRST
5171 };
5172
5173 void 
5174 i965_proc_picture(VADriverContextP ctx, 
5175                   VAProfile profile, 
5176                   union codec_state *codec_state,
5177                   struct hw_context *hw_context)
5178 {
5179     struct i965_driver_data *i965 = i965_driver_data(ctx);
5180     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5181     struct proc_state *proc_state = &codec_state->proc;
5182     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
5183     struct object_surface *obj_surface;
5184     struct i965_surface src_surface, dst_surface;
5185     VARectangle src_rect, dst_rect;
5186     VAStatus status;
5187     int i;
5188     VASurfaceID tmp_surfaces[VAProcFilterCount + 4];
5189     int num_tmp_surfaces = 0;
5190     unsigned int tiling = 0, swizzle = 0;
5191     int in_width, in_height;
5192
5193     assert(pipeline_param->surface != VA_INVALID_ID);
5194     assert(proc_state->current_render_target != VA_INVALID_ID);
5195
5196     obj_surface = SURFACE(pipeline_param->surface);
5197     in_width = obj_surface->orig_width;
5198     in_height = obj_surface->orig_height;
5199     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
5200
5201     src_surface.id = pipeline_param->surface;
5202     src_surface.type = I965_SURFACE_TYPE_SURFACE;
5203     src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5204
5205     VASurfaceID out_surface_id = VA_INVALID_ID;
5206     if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2')) {
5207         src_surface.id = pipeline_param->surface;
5208         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5209         src_surface.flags = I965_SURFACE_FLAG_FRAME;
5210         src_rect.x = 0;
5211         src_rect.y = 0;
5212         src_rect.width = in_width;
5213         src_rect.height = in_height;
5214
5215         status = i965_CreateSurfaces(ctx,
5216                                      in_width,
5217                                      in_height,
5218                                      VA_RT_FORMAT_YUV420,
5219                                      1,
5220                                      &out_surface_id);
5221         assert(status == VA_STATUS_SUCCESS);
5222         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5223         obj_surface = SURFACE(out_surface_id);
5224         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
5225
5226         dst_surface.id = out_surface_id;
5227         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5228         dst_surface.flags = I965_SURFACE_FLAG_FRAME;
5229         dst_rect.x = 0;
5230         dst_rect.y = 0;
5231         dst_rect.width = in_width;
5232         dst_rect.height = in_height;
5233
5234         status = i965_image_processing(ctx,
5235                                        &src_surface,
5236                                        &src_rect,
5237                                        &dst_surface,
5238                                        &dst_rect);
5239         assert(status == VA_STATUS_SUCCESS);
5240
5241         src_surface.id = out_surface_id;
5242         src_surface.type = I965_SURFACE_TYPE_SURFACE;
5243         src_surface.flags = proc_frame_to_pp_frame[pipeline_param->filter_flags & 0x3];
5244     }
5245
5246     if (pipeline_param->surface_region) {
5247         src_rect.x = pipeline_param->surface_region->x;
5248         src_rect.y = pipeline_param->surface_region->y;
5249         src_rect.width = pipeline_param->surface_region->width;
5250         src_rect.height = pipeline_param->surface_region->height;
5251     } else {
5252         src_rect.x = 0;
5253         src_rect.y = 0;
5254         src_rect.width = in_width;
5255         src_rect.height = in_height;
5256     }
5257
5258     if (pipeline_param->output_region) {
5259         dst_rect.x = pipeline_param->output_region->x;
5260         dst_rect.y = pipeline_param->output_region->y;
5261         dst_rect.width = pipeline_param->output_region->width;
5262         dst_rect.height = pipeline_param->output_region->height;
5263     } else {
5264         dst_rect.x = 0;
5265         dst_rect.y = 0;
5266         dst_rect.width = in_width;
5267         dst_rect.height = in_height;
5268     }
5269
5270     for (i = 0; i < pipeline_param->num_filters; i++) {
5271         struct object_buffer *obj_buffer = BUFFER(pipeline_param->filters[i]);
5272         VAProcFilterParameterBufferBase *filter_param = (VAProcFilterParameterBufferBase *)obj_buffer->buffer_store->buffer;
5273         VAProcFilterType filter_type = filter_param->type;
5274         out_surface_id = VA_INVALID_ID;
5275         int kernel_index = procfilter_to_pp_flag[filter_type];
5276
5277         if (kernel_index != PP_NULL &&
5278             proc_context->pp_context.pp_modules[kernel_index].kernel.bo != NULL) {
5279             status = i965_CreateSurfaces(ctx,
5280                                          in_width,
5281                                          in_height,
5282                                          VA_RT_FORMAT_YUV420,
5283                                          1,
5284                                          &out_surface_id);
5285             assert(status == VA_STATUS_SUCCESS);
5286             tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5287             obj_surface = SURFACE(out_surface_id);
5288             i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5289             dst_surface.id = out_surface_id;
5290             dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5291             status = i965_post_processing_internal(ctx, &proc_context->pp_context,
5292                                                    &src_surface,
5293                                                    &src_rect,
5294                                                    &dst_surface,
5295                                                    &src_rect,
5296                                                    kernel_index,
5297                                                    filter_param);
5298
5299             if (status == VA_STATUS_SUCCESS) {
5300                 src_surface.id = dst_surface.id;
5301                 src_surface.type = dst_surface.type;
5302                 src_surface.flags = dst_surface.flags;
5303             }
5304         }
5305     }
5306
5307     obj_surface = SURFACE(proc_state->current_render_target);
5308     int csc_needed = 0;
5309     if (obj_surface->fourcc && obj_surface->fourcc !=  VA_FOURCC('N','V','1','2')){
5310         csc_needed = 1;
5311         out_surface_id = VA_INVALID_ID;
5312         status = i965_CreateSurfaces(ctx,
5313                                      obj_surface->orig_width,
5314                                      obj_surface->orig_height,
5315                                      VA_RT_FORMAT_YUV420, 
5316                                      1,
5317                                      &out_surface_id);
5318         assert(status == VA_STATUS_SUCCESS);
5319         tmp_surfaces[num_tmp_surfaces++] = out_surface_id;
5320         struct object_surface *csc_surface = SURFACE(out_surface_id);
5321         i965_check_alloc_surface_bo(ctx, csc_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5322         dst_surface.id = out_surface_id;
5323     } else {
5324         i965_check_alloc_surface_bo(ctx, obj_surface, !!tiling, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
5325         dst_surface.id = proc_state->current_render_target;
5326     }
5327
5328     dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5329     i965_vpp_clear_surface(ctx, &proc_context->pp_context, proc_state->current_render_target, pipeline_param->output_background_color); 
5330
5331     // load/save doesn't support different origin offset for src and dst surface
5332     if (src_rect.width == dst_rect.width &&
5333         src_rect.height == dst_rect.height &&
5334         src_rect.x == dst_rect.x &&
5335         src_rect.y == dst_rect.y) {
5336         i965_post_processing_internal(ctx, &proc_context->pp_context,
5337                                       &src_surface,
5338                                       &src_rect,
5339                                       &dst_surface,
5340                                       &dst_rect,
5341                                       PP_NV12_LOAD_SAVE_N12,
5342                                       NULL);
5343     } else {
5344
5345         i965_post_processing_internal(ctx, &proc_context->pp_context,
5346                                       &src_surface,
5347                                       &src_rect,
5348                                       &dst_surface,
5349                                       &dst_rect,
5350                                       (pipeline_param->filter_flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC ?
5351                                       PP_NV12_AVS : PP_NV12_SCALING,
5352                                       NULL);
5353     }
5354
5355     if (csc_needed) {
5356         src_surface.id = dst_surface.id;
5357         src_surface.type = dst_surface.type;
5358         src_surface.flags = dst_surface.flags;
5359         dst_surface.id = proc_state->current_render_target;
5360         dst_surface.type = I965_SURFACE_TYPE_SURFACE;
5361         i965_image_processing(ctx, &src_surface, &dst_rect, &dst_surface, &dst_rect);
5362     }
5363     
5364     if (num_tmp_surfaces)
5365         i965_DestroySurfaces(ctx,
5366                              tmp_surfaces,
5367                              num_tmp_surfaces);
5368
5369     intel_batchbuffer_flush(hw_context->batch);
5370 }
5371
5372 static void
5373 i965_proc_context_destroy(void *hw_context)
5374 {
5375     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
5376
5377     i965_post_processing_context_finalize(&proc_context->pp_context);
5378     intel_batchbuffer_free(proc_context->base.batch);
5379     free(proc_context);
5380 }
5381
5382 struct hw_context *
5383 i965_proc_context_init(VADriverContextP ctx, struct object_config *obj_config)
5384 {
5385     struct intel_driver_data *intel = intel_driver_data(ctx);
5386     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
5387
5388     proc_context->base.destroy = i965_proc_context_destroy;
5389     proc_context->base.run = i965_proc_picture;
5390     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
5391     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
5392
5393     return (struct hw_context *)proc_context;
5394 }