i965_drv_video: post process depends on the internal pixel format of a surface
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 static const uint32_t pp_null_gen5[][4] = {
49 #include "shaders/post_processing/null.g4b.gen5"
50 };
51
52 static const uint32_t pp_nv12_load_save_nv12_gen5[][4] = {
53 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
54 };
55
56 static const uint32_t pp_nv12_load_save_pl3_gen5[][4] = {
57 #include "shaders/post_processing/nv12_load_save_pl3.g4b.gen5"
58 };
59
60 static const uint32_t pp_pl3_load_save_nv12_gen5[][4] = {
61 #include "shaders/post_processing/pl3_load_save_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_pl3_load_save_pl3_gen5[][4] = {
65 #include "shaders/post_processing/pl3_load_save_pl3.g4b.gen5"
66 };
67
68 static const uint32_t pp_nv12_scaling_gen5[][4] = {
69 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
70 };
71
72 static const uint32_t pp_nv12_avs_gen5[][4] = {
73 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
74 };
75
76 static const uint32_t pp_nv12_dndi_gen5[][4] = {
77 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
78 };
79
80 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
81                                const struct i965_surface *src_surface,
82                                const VARectangle *src_rect,
83                                const struct i965_surface *dst_surface,
84                                const VARectangle *dst_rect);
85 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
86                                    const struct i965_surface *src_surface,
87                                    const VARectangle *src_rect,
88                                    const struct i965_surface *dst_surface,
89                                    const VARectangle *dst_rect);
90 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
91                                        const struct i965_surface *src_surface,
92                                        const VARectangle *src_rect,
93                                        const struct i965_surface *dst_surface,
94                                        const VARectangle *dst_rect);
95 static void pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
96                                             const struct i965_surface *src_surface,
97                                             const VARectangle *src_rect,
98                                             const struct i965_surface *dst_surface,
99                                             const VARectangle *dst_rect);
100 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
101                                     const struct i965_surface *src_surface,
102                                     const VARectangle *src_rect,
103                                     const struct i965_surface *dst_surface,
104                                     const VARectangle *dst_rect);
105
106 static struct pp_module pp_modules_gen5[] = {
107     {
108         {
109             "NULL module (for testing)",
110             PP_NULL,
111             pp_null_gen5,
112             sizeof(pp_null_gen5),
113             NULL,
114         },
115
116         pp_null_initialize,
117     },
118
119     {
120         {
121             "NV12_NV12",
122             PP_NV12_LOAD_SAVE_N12,
123             pp_nv12_load_save_nv12_gen5,
124             sizeof(pp_nv12_load_save_nv12_gen5),
125             NULL,
126         },
127
128         pp_plx_load_save_plx_initialize,
129     },
130
131     {
132         {
133             "NV12_PL3",
134             PP_NV12_LOAD_SAVE_PL3,
135             pp_nv12_load_save_pl3_gen5,
136             sizeof(pp_nv12_load_save_pl3_gen5),
137             NULL,
138         },
139
140         pp_plx_load_save_plx_initialize,
141     },
142
143     {
144         {
145             "PL3_NV12",
146             PP_PL3_LOAD_SAVE_N12,
147             pp_pl3_load_save_nv12_gen5,
148             sizeof(pp_pl3_load_save_nv12_gen5),
149             NULL,
150         },
151
152         pp_plx_load_save_plx_initialize,
153     },
154
155     {
156         {
157             "PL3_PL3",
158             PP_PL3_LOAD_SAVE_N12,
159             pp_pl3_load_save_pl3_gen5,
160             sizeof(pp_pl3_load_save_pl3_gen5),
161             NULL,
162         },
163
164         pp_plx_load_save_plx_initialize
165     },
166
167     {
168         {
169             "NV12 Scaling module",
170             PP_NV12_SCALING,
171             pp_nv12_scaling_gen5,
172             sizeof(pp_nv12_scaling_gen5),
173             NULL,
174         },
175
176         pp_nv12_scaling_initialize,
177     },
178
179     {
180         {
181             "NV12 AVS module",
182             PP_NV12_AVS,
183             pp_nv12_avs_gen5,
184             sizeof(pp_nv12_avs_gen5),
185             NULL,
186         },
187
188         pp_nv12_avs_initialize,
189     },
190
191     {
192         {
193             "NV12 DNDI module",
194             PP_NV12_DNDI,
195             pp_nv12_dndi_gen5,
196             sizeof(pp_nv12_dndi_gen5),
197             NULL,
198         },
199
200         pp_nv12_dndi_initialize,
201     },
202 };
203
204 static const uint32_t pp_null_gen6[][4] = {
205 #include "shaders/post_processing/null.g6b"
206 };
207
208 static const uint32_t pp_nv12_load_save_nv12_gen6[][4] = {
209 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
210 };
211
212 static const uint32_t pp_nv12_load_save_pl3_gen6[][4] = {
213 #include "shaders/post_processing/nv12_load_save_pl3.g6b"
214 };
215
216 static const uint32_t pp_pl3_load_save_nv12_gen6[][4] = {
217 #include "shaders/post_processing/pl3_load_save_nv12.g6b"
218 };
219
220 static const uint32_t pp_pl3_load_save_pl3_gen6[][4] = {
221 #include "shaders/post_processing/pl3_load_save_pl3.g6b"
222 };
223
224 static const uint32_t pp_nv12_scaling_gen6[][4] = {
225 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
226 };
227
228 static const uint32_t pp_nv12_avs_gen6[][4] = {
229 #include "shaders/post_processing/nv12_avs_nv12.g6b"
230 };
231
232 static const uint32_t pp_nv12_dndi_gen6[][4] = {
233 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
234 };
235
236 static struct pp_module pp_modules_gen6[] = {
237     {
238         {
239             "NULL module (for testing)",
240             PP_NULL,
241             pp_null_gen6,
242             sizeof(pp_null_gen6),
243             NULL,
244         },
245
246         pp_null_initialize,
247     },
248
249     {
250         {
251             "NV12_NV12",
252             PP_NV12_LOAD_SAVE_N12,
253             pp_nv12_load_save_nv12_gen6,
254             sizeof(pp_nv12_load_save_nv12_gen6),
255             NULL,
256         },
257
258         pp_plx_load_save_plx_initialize,
259     },
260
261     {
262         {
263             "NV12_PL3",
264             PP_NV12_LOAD_SAVE_PL3,
265             pp_nv12_load_save_pl3_gen6,
266             sizeof(pp_nv12_load_save_pl3_gen6),
267             NULL,
268         },
269         
270         pp_plx_load_save_plx_initialize,
271     },
272
273     {
274         {
275             "PL3_NV12",
276             PP_PL3_LOAD_SAVE_N12,
277             pp_pl3_load_save_nv12_gen6,
278             sizeof(pp_pl3_load_save_nv12_gen6),
279             NULL,
280         },
281
282         pp_plx_load_save_plx_initialize,
283     },
284
285     {
286         {
287             "PL3_PL3",
288             PP_PL3_LOAD_SAVE_N12,
289             pp_pl3_load_save_pl3_gen6,
290             sizeof(pp_pl3_load_save_pl3_gen6),
291             NULL,
292         },
293
294         pp_plx_load_save_plx_initialize,
295     },
296
297     {
298         {
299             "NV12 Scaling module",
300             PP_NV12_SCALING,
301             pp_nv12_scaling_gen6,
302             sizeof(pp_nv12_scaling_gen6),
303             NULL,
304         },
305
306         pp_nv12_scaling_initialize,
307     },
308
309     {
310         {
311             "NV12 AVS module",
312             PP_NV12_AVS,
313             pp_nv12_avs_gen6,
314             sizeof(pp_nv12_avs_gen6),
315             NULL,
316         },
317
318         pp_nv12_avs_initialize,
319     },
320
321     {
322         {
323             "NV12 DNDI module",
324             PP_NV12_DNDI,
325             pp_nv12_dndi_gen6,
326             sizeof(pp_nv12_dndi_gen6),
327             NULL,
328         },
329
330         pp_nv12_dndi_initialize,
331     },
332 };
333
334 #define pp_static_parameter     pp_context->pp_static_parameter
335 #define pp_inline_parameter     pp_context->pp_inline_parameter
336
337 static int
338 pp_get_surface_fourcc(VADriverContextP ctx, const struct i965_surface *surface)
339 {
340     struct i965_driver_data *i965 = i965_driver_data(ctx);
341     int fourcc;
342
343     if (surface->flag == I965_SURFACE_IMAGE) {
344         struct object_image *obj_image = IMAGE(surface->id);
345         fourcc = obj_image->image.format.fourcc;
346     } else {
347         struct object_surface *obj_surface = SURFACE(surface->id);
348         fourcc = obj_surface->fourcc;
349     }
350
351     return fourcc;
352 }
353
354 static void
355 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
356 {
357     switch (tiling) {
358     case I915_TILING_NONE:
359         ss->ss3.tiled_surface = 0;
360         ss->ss3.tile_walk = 0;
361         break;
362     case I915_TILING_X:
363         ss->ss3.tiled_surface = 1;
364         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
365         break;
366     case I915_TILING_Y:
367         ss->ss3.tiled_surface = 1;
368         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
369         break;
370     }
371 }
372
373 static void
374 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
375 {
376     switch (tiling) {
377     case I915_TILING_NONE:
378         ss->ss2.tiled_surface = 0;
379         ss->ss2.tile_walk = 0;
380         break;
381     case I915_TILING_X:
382         ss->ss2.tiled_surface = 1;
383         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
384         break;
385     case I915_TILING_Y:
386         ss->ss2.tiled_surface = 1;
387         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
388         break;
389     }
390 }
391
392 static void
393 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
394 {
395
396 }
397
398 static void
399 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
400 {
401     struct i965_interface_descriptor *desc;
402     dri_bo *bo;
403     int pp_index = pp_context->current_pp;
404
405     bo = pp_context->idrt.bo;
406     dri_bo_map(bo, 1);
407     assert(bo->virtual);
408     desc = bo->virtual;
409     memset(desc, 0, sizeof(*desc));
410     desc->desc0.grf_reg_blocks = 10;
411     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
412     desc->desc1.const_urb_entry_read_offset = 0;
413     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
414     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
415     desc->desc2.sampler_count = 0;
416     desc->desc3.binding_table_entry_count = 0;
417     desc->desc3.binding_table_pointer = 
418         pp_context->binding_table.bo->offset >> 5; /*reloc */
419
420     dri_bo_emit_reloc(bo,
421                       I915_GEM_DOMAIN_INSTRUCTION, 0,
422                       desc->desc0.grf_reg_blocks,
423                       offsetof(struct i965_interface_descriptor, desc0),
424                       pp_context->pp_modules[pp_index].kernel.bo);
425
426     dri_bo_emit_reloc(bo,
427                       I915_GEM_DOMAIN_INSTRUCTION, 0,
428                       desc->desc2.sampler_count << 2,
429                       offsetof(struct i965_interface_descriptor, desc2),
430                       pp_context->sampler_state_table.bo);
431
432     dri_bo_emit_reloc(bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, 0,
434                       desc->desc3.binding_table_entry_count,
435                       offsetof(struct i965_interface_descriptor, desc3),
436                       pp_context->binding_table.bo);
437
438     dri_bo_unmap(bo);
439     pp_context->idrt.num_interface_descriptors++;
440 }
441
442 static void
443 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
444 {
445     unsigned int *binding_table;
446     dri_bo *bo = pp_context->binding_table.bo;
447     int i;
448
449     dri_bo_map(bo, 1);
450     assert(bo->virtual);
451     binding_table = bo->virtual;
452     memset(binding_table, 0, bo->size);
453
454     for (i = 0; i < MAX_PP_SURFACES; i++) {
455         if (pp_context->surfaces[i].ss_bo) {
456             assert(pp_context->surfaces[i].s_bo);
457
458             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
459             dri_bo_emit_reloc(bo,
460                               I915_GEM_DOMAIN_INSTRUCTION, 0,
461                               0,
462                               i * sizeof(*binding_table),
463                               pp_context->surfaces[i].ss_bo);
464         }
465     
466     }
467
468     dri_bo_unmap(bo);
469 }
470
471 static void
472 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
473 {
474     struct i965_vfe_state *vfe_state;
475     dri_bo *bo;
476
477     bo = pp_context->vfe_state.bo;
478     dri_bo_map(bo, 1);
479     assert(bo->virtual);
480     vfe_state = bo->virtual;
481     memset(vfe_state, 0, sizeof(*vfe_state));
482     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
483     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
484     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
485     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
486     vfe_state->vfe1.children_present = 0;
487     vfe_state->vfe2.interface_descriptor_base = 
488         pp_context->idrt.bo->offset >> 4; /* reloc */
489     dri_bo_emit_reloc(bo,
490                       I915_GEM_DOMAIN_INSTRUCTION, 0,
491                       0,
492                       offsetof(struct i965_vfe_state, vfe2),
493                       pp_context->idrt.bo);
494     dri_bo_unmap(bo);
495 }
496
497 static void
498 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
499 {
500     unsigned char *constant_buffer;
501
502     assert(sizeof(pp_static_parameter) == 128);
503     dri_bo_map(pp_context->curbe.bo, 1);
504     assert(pp_context->curbe.bo->virtual);
505     constant_buffer = pp_context->curbe.bo->virtual;
506     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
507     dri_bo_unmap(pp_context->curbe.bo);
508 }
509
510 static void
511 ironlake_pp_states_setup(VADriverContextP ctx,
512                          struct i965_post_processing_context *pp_context)
513 {
514     ironlake_pp_surface_state(pp_context);
515     ironlake_pp_binding_table(pp_context);
516     ironlake_pp_interface_descriptor_table(pp_context);
517     ironlake_pp_vfe_state(pp_context);
518     ironlake_pp_upload_constants(pp_context);
519 }
520
521 static void
522 ironlake_pp_pipeline_select(VADriverContextP ctx,
523                             struct i965_post_processing_context *pp_context)
524 {
525     struct intel_batchbuffer *batch = pp_context->batch;
526
527     BEGIN_BATCH(batch, 1);
528     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
529     ADVANCE_BATCH(batch);
530 }
531
532 static void
533 ironlake_pp_urb_layout(VADriverContextP ctx,
534                        struct i965_post_processing_context *pp_context)
535 {
536     struct intel_batchbuffer *batch = pp_context->batch;
537     unsigned int vfe_fence, cs_fence;
538
539     vfe_fence = pp_context->urb.cs_start;
540     cs_fence = pp_context->urb.size;
541
542     BEGIN_BATCH(batch, 3);
543     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
544     OUT_BATCH(batch, 0);
545     OUT_BATCH(batch, 
546               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
547               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
548     ADVANCE_BATCH(batch);
549 }
550
551 static void
552 ironlake_pp_state_base_address(VADriverContextP ctx,
553                                struct i965_post_processing_context *pp_context)
554 {
555     struct intel_batchbuffer *batch = pp_context->batch;
556
557     BEGIN_BATCH(batch, 8);
558     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
559     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
560     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
561     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
562     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
563     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
564     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
565     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
566     ADVANCE_BATCH(batch);
567 }
568
569 static void
570 ironlake_pp_state_pointers(VADriverContextP ctx,
571                            struct i965_post_processing_context *pp_context)
572 {
573     struct intel_batchbuffer *batch = pp_context->batch;
574
575     BEGIN_BATCH(batch, 3);
576     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
577     OUT_BATCH(batch, 0);
578     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
579     ADVANCE_BATCH(batch);
580 }
581
582 static void 
583 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
584                           struct i965_post_processing_context *pp_context)
585 {
586     struct intel_batchbuffer *batch = pp_context->batch;
587
588     BEGIN_BATCH(batch, 2);
589     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
590     OUT_BATCH(batch,
591               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
592               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
593     ADVANCE_BATCH(batch);
594 }
595
596 static void
597 ironlake_pp_constant_buffer(VADriverContextP ctx,
598                             struct i965_post_processing_context *pp_context)
599 {
600     struct intel_batchbuffer *batch = pp_context->batch;
601
602     BEGIN_BATCH(batch, 2);
603     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
604     OUT_RELOC(batch, pp_context->curbe.bo,
605               I915_GEM_DOMAIN_INSTRUCTION, 0,
606               pp_context->urb.size_cs_entry - 1);
607     ADVANCE_BATCH(batch);    
608 }
609
610 static void
611 ironlake_pp_object_walker(VADriverContextP ctx,
612                           struct i965_post_processing_context *pp_context)
613 {
614     struct intel_batchbuffer *batch = pp_context->batch;
615     int x, x_steps, y, y_steps;
616
617     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
618     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
619
620     for (y = 0; y < y_steps; y++) {
621         for (x = 0; x < x_steps; x++) {
622             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
623                 BEGIN_BATCH(batch, 20);
624                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
625                 OUT_BATCH(batch, 0);
626                 OUT_BATCH(batch, 0); /* no indirect data */
627                 OUT_BATCH(batch, 0);
628
629                 /* inline data grf 5-6 */
630                 assert(sizeof(pp_inline_parameter) == 64);
631                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
632
633                 ADVANCE_BATCH(batch);
634             }
635         }
636     }
637 }
638
639 static void
640 ironlake_pp_pipeline_setup(VADriverContextP ctx,
641                            struct i965_post_processing_context *pp_context)
642 {
643     struct intel_batchbuffer *batch = pp_context->batch;
644
645     intel_batchbuffer_start_atomic(batch, 0x1000);
646     intel_batchbuffer_emit_mi_flush(batch);
647     ironlake_pp_pipeline_select(ctx, pp_context);
648     ironlake_pp_state_base_address(ctx, pp_context);
649     ironlake_pp_state_pointers(ctx, pp_context);
650     ironlake_pp_urb_layout(ctx, pp_context);
651     ironlake_pp_cs_urb_layout(ctx, pp_context);
652     ironlake_pp_constant_buffer(ctx, pp_context);
653     ironlake_pp_object_walker(ctx, pp_context);
654     intel_batchbuffer_end_atomic(batch);
655 }
656
657 static void
658 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
659                           dri_bo *surf_bo, unsigned long surf_bo_offset,
660                           int width, int height, int pitch, int format, 
661                           int index, int is_target)
662 {
663     struct i965_driver_data *i965 = i965_driver_data(ctx);
664     struct i965_surface_state *ss;
665     dri_bo *ss_bo;
666     unsigned int tiling;
667     unsigned int swizzle;
668
669     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
670     pp_context->surfaces[index].s_bo = surf_bo;
671     dri_bo_reference(pp_context->surfaces[index].s_bo);
672     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
673                          "surface state", 
674                          sizeof(struct i965_surface_state), 
675                          4096);
676     assert(ss_bo);
677     pp_context->surfaces[index].ss_bo = ss_bo;
678     dri_bo_map(ss_bo, True);
679     assert(ss_bo->virtual);
680     ss = ss_bo->virtual;
681     memset(ss, 0, sizeof(*ss));
682     ss->ss0.surface_type = I965_SURFACE_2D;
683     ss->ss0.surface_format = format;
684     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
685     ss->ss2.width = width - 1;
686     ss->ss2.height = height - 1;
687     ss->ss3.pitch = pitch - 1;
688     pp_set_surface_tiling(ss, tiling);
689     dri_bo_emit_reloc(ss_bo,
690                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
691                       surf_bo_offset,
692                       offsetof(struct i965_surface_state, ss1),
693                       pp_context->surfaces[index].s_bo);
694     dri_bo_unmap(ss_bo);
695 }
696
697 static void
698 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
699                            dri_bo *surf_bo, unsigned long surf_bo_offset,
700                            int width, int height, int wpitch,
701                            int xoffset, int yoffset,
702                            int format, int interleave_chroma,
703                            int index)
704 {
705     struct i965_driver_data *i965 = i965_driver_data(ctx);
706     struct i965_surface_state2 *ss2;
707     dri_bo *ss2_bo;
708     unsigned int tiling;
709     unsigned int swizzle;
710
711     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
712     pp_context->surfaces[index].s_bo = surf_bo;
713     dri_bo_reference(pp_context->surfaces[index].s_bo);
714     ss2_bo = dri_bo_alloc(i965->intel.bufmgr, 
715                           "YUV surface state", 
716                           sizeof(struct i965_surface_state2), 
717                           4096);
718     assert(ss2_bo);
719     pp_context->surfaces[index].ss_bo = ss2_bo;
720     dri_bo_map(ss2_bo, True);
721     assert(ss2_bo->virtual);
722     ss2 = ss2_bo->virtual;
723     memset(ss2, 0, sizeof(*ss2));
724     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
725     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
726     ss2->ss1.width = width - 1;
727     ss2->ss1.height = height - 1;
728     ss2->ss2.pitch = wpitch - 1;
729     ss2->ss2.interleave_chroma = interleave_chroma;
730     ss2->ss2.surface_format = format;
731     ss2->ss3.x_offset_for_cb = xoffset;
732     ss2->ss3.y_offset_for_cb = yoffset;
733     pp_set_surface2_tiling(ss2, tiling);
734     dri_bo_emit_reloc(ss2_bo,
735                       I915_GEM_DOMAIN_RENDER, 0,
736                       surf_bo_offset,
737                       offsetof(struct i965_surface_state2, ss0),
738                       surf_bo);
739     dri_bo_unmap(ss2_bo);
740 }
741
742 static void 
743 pp_set_media_rw_message_surface(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
744                                 const struct i965_surface *surface, 
745                                 int base_index, int is_target,
746                                 int *width, int *height, int *pitch, int *offset)
747 {
748     struct i965_driver_data *i965 = i965_driver_data(ctx);
749     struct object_surface *obj_surface;
750     struct object_image *obj_image;
751     dri_bo *bo;
752     int fourcc = pp_get_surface_fourcc(ctx, surface);
753     const int Y = 0;
754     const int U = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 2 : 1;
755     const int V = fourcc == VA_FOURCC('Y', 'V', '1', '2') ? 1 : 2;
756     const int UV = 1;
757     int interleaved_uv = fourcc == VA_FOURCC('N', 'V', '1', '2');
758
759     if (surface->flag == I965_SURFACE_SURFACE) {
760         obj_surface = SURFACE(surface->id);
761         bo = obj_surface->bo;
762         width[0] = obj_surface->orig_width;
763         height[0] = obj_surface->orig_height;
764         pitch[0] = obj_surface->width;
765         offset[0] = 0;
766
767         if (interleaved_uv) {
768             width[1] = obj_surface->orig_width;
769             height[1] = obj_surface->orig_height / 2;
770             pitch[1] = obj_surface->width;
771             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
772         } else {
773             width[1] = obj_surface->orig_width / 2;
774             height[1] = obj_surface->orig_height / 2;
775             pitch[1] = obj_surface->width / 2;
776             offset[1] = offset[0] + obj_surface->width * obj_surface->height;
777             width[2] = obj_surface->orig_width / 2;
778             height[2] = obj_surface->orig_height / 2;
779             pitch[2] = obj_surface->width / 2;
780             offset[2] = offset[1] + (obj_surface->width / 2) * (obj_surface->height / 2);
781         }
782     } else {
783         obj_image = IMAGE(surface->id);
784         bo = obj_image->bo;
785         width[0] = obj_image->image.width;
786         height[0] = obj_image->image.height;
787         pitch[0] = obj_image->image.pitches[0];
788         offset[0] = obj_image->image.offsets[0];
789
790         if (interleaved_uv) {
791             width[1] = obj_image->image.width;
792             height[1] = obj_image->image.height / 2;
793             pitch[1] = obj_image->image.pitches[1];
794             offset[1] = obj_image->image.offsets[1];
795         } else {
796             width[1] = obj_image->image.width / 2;
797             height[1] = obj_image->image.height / 2;
798             pitch[1] = obj_image->image.pitches[1];
799             offset[1] = obj_image->image.offsets[1];
800             width[2] = obj_image->image.width / 2;
801             height[2] = obj_image->image.height / 2;
802             pitch[2] = obj_image->image.pitches[2];
803             offset[2] = obj_image->image.offsets[2];
804         }
805     }
806
807     /* Y surface */
808     i965_pp_set_surface_state(ctx, pp_context,
809                               bo, offset[Y],
810                               width[Y] / 4, height[Y], pitch[Y], I965_SURFACEFORMAT_R8_UNORM,
811                               base_index, is_target);
812
813     if (interleaved_uv) {
814         i965_pp_set_surface_state(ctx, pp_context,
815                                   bo, offset[UV],
816                                   width[UV] / 4, height[UV], pitch[UV], I965_SURFACEFORMAT_R8_UNORM,
817                                   base_index + 1, is_target);
818     } else {
819         /* U surface */
820         i965_pp_set_surface_state(ctx, pp_context,
821                                   bo, offset[U],
822                                   width[U] / 4, height[U], pitch[U], I965_SURFACEFORMAT_R8_UNORM,
823                                   base_index + 1, is_target);
824
825         /* V surface */
826         i965_pp_set_surface_state(ctx, pp_context,
827                                   bo, offset[V],
828                                   width[V] / 4, height[V], pitch[V], I965_SURFACEFORMAT_R8_UNORM,
829                                   base_index + 2, is_target);
830     }
831
832 }
833
834 static int
835 pp_null_x_steps(void *private_context)
836 {
837     return 1;
838 }
839
840 static int
841 pp_null_y_steps(void *private_context)
842 {
843     return 1;
844 }
845
846 static int
847 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
848 {
849     return 0;
850 }
851
852 static void
853 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
854                    const struct i965_surface *src_surface,
855                    const VARectangle *src_rect,
856                    const struct i965_surface *dst_surface,
857                    const VARectangle *dst_rect)
858 {
859     /* private function & data */
860     pp_context->pp_x_steps = pp_null_x_steps;
861     pp_context->pp_y_steps = pp_null_y_steps;
862     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
863 }
864
865 static int
866 pp_load_save_x_steps(void *private_context)
867 {
868     return 1;
869 }
870
871 static int
872 pp_load_save_y_steps(void *private_context)
873 {
874     struct pp_load_save_context *pp_load_save_context = private_context;
875
876     return pp_load_save_context->dest_h / 8;
877 }
878
879 static int
880 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
881 {
882     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
883     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
884     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
885     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
886
887     return 0;
888 }
889
890 static void
891 pp_plx_load_save_plx_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
892                                 const struct i965_surface *src_surface,
893                                 const VARectangle *src_rect,
894                                 const struct i965_surface *dst_surface,
895                                 const VARectangle *dst_rect)
896 {
897     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
898     int width[3], height[3], pitch[3], offset[3];
899     const int Y = 0;
900
901     /* source surface */
902     pp_set_media_rw_message_surface(ctx, pp_context, src_surface, 1, 0,
903                                     width, height, pitch, offset);
904
905     /* destination surface */
906     pp_set_media_rw_message_surface(ctx, pp_context, dst_surface, 7, 1,
907                                     width, height, pitch, offset);
908
909     /* private function & data */
910     pp_context->pp_x_steps = pp_load_save_x_steps;
911     pp_context->pp_y_steps = pp_load_save_y_steps;
912     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
913     pp_load_save_context->dest_h = ALIGN(height[Y], 16);
914     pp_load_save_context->dest_w = ALIGN(width[Y], 16);
915
916     pp_inline_parameter.grf5.block_count_x = ALIGN(width[Y], 16) / 16;   /* 1 x N */
917     pp_inline_parameter.grf5.number_blocks = ALIGN(width[Y], 16) / 16;
918 }
919
920 static int
921 pp_scaling_x_steps(void *private_context)
922 {
923     return 1;
924 }
925
926 static int
927 pp_scaling_y_steps(void *private_context)
928 {
929     struct pp_scaling_context *pp_scaling_context = private_context;
930
931     return pp_scaling_context->dest_h / 8;
932 }
933
934 static int
935 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
936 {
937     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
938     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
939     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
940
941     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
942     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
943     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
944     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
945     
946     return 0;
947 }
948
949 static void
950 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
951                            const struct i965_surface *src_surface,
952                            const VARectangle *src_rect,
953                            const struct i965_surface *dst_surface,
954                            const VARectangle *dst_rect)
955 {
956     struct i965_driver_data *i965 = i965_driver_data(ctx);
957     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
958     struct object_surface *obj_surface;
959     struct i965_sampler_state *sampler_state;
960     int in_w, in_h, in_wpitch, in_hpitch;
961     int out_w, out_h, out_wpitch, out_hpitch;
962
963     /* source surface */
964     obj_surface = SURFACE(src_surface->id);
965     in_w = obj_surface->orig_width;
966     in_h = obj_surface->orig_height;
967     in_wpitch = obj_surface->width;
968     in_hpitch = obj_surface->height;
969
970     /* source Y surface index 1 */
971     i965_pp_set_surface_state(ctx, pp_context,
972                               obj_surface->bo, 0,
973                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
974                               1, 0);
975
976     /* source UV surface index 2 */
977     i965_pp_set_surface_state(ctx, pp_context,
978                               obj_surface->bo, in_wpitch * in_hpitch,
979                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
980                               2, 0);
981
982     /* destination surface */
983     obj_surface = SURFACE(dst_surface->id);
984     out_w = obj_surface->orig_width;
985     out_h = obj_surface->orig_height;
986     out_wpitch = obj_surface->width;
987     out_hpitch = obj_surface->height;
988
989     /* destination Y surface index 7 */
990     i965_pp_set_surface_state(ctx, pp_context,
991                               obj_surface->bo, 0,
992                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
993                               7, 1);
994
995     /* destination UV surface index 8 */
996     i965_pp_set_surface_state(ctx, pp_context,
997                               obj_surface->bo, out_wpitch * out_hpitch,
998                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
999                               8, 1);
1000
1001     /* sampler state */
1002     dri_bo_map(pp_context->sampler_state_table.bo, True);
1003     assert(pp_context->sampler_state_table.bo->virtual);
1004     sampler_state = pp_context->sampler_state_table.bo->virtual;
1005
1006     /* SIMD16 Y index 1 */
1007     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
1008     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1009     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1010     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1011     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1012
1013     /* SIMD16 UV index 2 */
1014     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
1015     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
1016     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1017     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1018     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
1019
1020     dri_bo_unmap(pp_context->sampler_state_table.bo);
1021
1022     /* private function & data */
1023     pp_context->pp_x_steps = pp_scaling_x_steps;
1024     pp_context->pp_y_steps = pp_scaling_y_steps;
1025     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
1026
1027     pp_scaling_context->dest_x = dst_rect->x;
1028     pp_scaling_context->dest_y = dst_rect->y;
1029     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
1030     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
1031     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1032     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1033
1034     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1035
1036     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1037     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
1038     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
1039     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1040     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1041 }
1042
1043 static int
1044 pp_avs_x_steps(void *private_context)
1045 {
1046     struct pp_avs_context *pp_avs_context = private_context;
1047
1048     return pp_avs_context->dest_w / 16;
1049 }
1050
1051 static int
1052 pp_avs_y_steps(void *private_context)
1053 {
1054     return 1;
1055 }
1056
1057 static int
1058 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1059 {
1060     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1061     float src_x_steping, src_y_steping, video_step_delta;
1062     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1063
1064     if (tmp_w >= pp_avs_context->dest_w) {
1065         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1066         pp_inline_parameter.grf6.video_step_delta = 0;
1067         
1068         if (x == 0) {
1069             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
1070                 pp_avs_context->src_normalized_x;
1071         } else {
1072             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1073             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1074             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1075                 16 * 15 * video_step_delta / 2;
1076         }
1077     } else {
1078         int n0, n1, n2, nls_left, nls_right;
1079         int factor_a = 5, factor_b = 4;
1080         float f;
1081
1082         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1083         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1084         n2 = tmp_w / (16 * factor_a);
1085         nls_left = n0 + n2;
1086         nls_right = n1 + n2;
1087         f = (float) n2 * 16 / tmp_w;
1088         
1089         if (n0 < 5) {
1090             pp_inline_parameter.grf6.video_step_delta = 0.0;
1091
1092             if (x == 0) {
1093                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1094                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1095             } else {
1096                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1097                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1098                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1099                     16 * 15 * video_step_delta / 2;
1100             }
1101         } else {
1102             if (x < nls_left) {
1103                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1104                 float a = f / (nls_left * 16 * factor_b);
1105                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1106                 
1107                 pp_inline_parameter.grf6.video_step_delta = b;
1108
1109                 if (x == 0) {
1110                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1111                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1112                 } else {
1113                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1114                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1115                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1116                         16 * 15 * video_step_delta / 2;
1117                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1118                 }
1119             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1120                 /* scale the center linearly */
1121                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1122                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1123                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1124                     16 * 15 * video_step_delta / 2;
1125                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1126                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1127             } else {
1128                 float a = f / (nls_right * 16 * factor_b);
1129                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1130
1131                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1132                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1133                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1134                     16 * 15 * video_step_delta / 2;
1135                 pp_inline_parameter.grf6.video_step_delta = -b;
1136
1137                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1138                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1139                 else
1140                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1141             }
1142         }
1143     }
1144
1145     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1146     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1147     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1148     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1149
1150     return 0;
1151 }
1152
1153 static void
1154 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1155                        const struct i965_surface *src_surface,
1156                        const VARectangle *src_rect,
1157                        const struct i965_surface *dst_surface,
1158                        const VARectangle *dst_rect)
1159 {
1160     struct i965_driver_data *i965 = i965_driver_data(ctx);
1161     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1162     struct object_surface *obj_surface;
1163     struct i965_sampler_8x8 *sampler_8x8;
1164     struct i965_sampler_8x8_state *sampler_8x8_state;
1165     int index;
1166     int in_w, in_h, in_wpitch, in_hpitch;
1167     int out_w, out_h, out_wpitch, out_hpitch;
1168
1169     /* surface */
1170     obj_surface = SURFACE(src_surface->id);
1171     in_w = obj_surface->orig_width;
1172     in_h = obj_surface->orig_height;
1173     in_wpitch = obj_surface->width;
1174     in_hpitch = obj_surface->height;
1175
1176     /* source Y surface index 1 */
1177     i965_pp_set_surface2_state(ctx, pp_context,
1178                                obj_surface->bo, 0,
1179                                in_w, in_h, in_wpitch,
1180                                0, 0,
1181                                SURFACE_FORMAT_Y8_UNORM, 0,
1182                                1);
1183
1184     /* source UV surface index 2 */
1185     i965_pp_set_surface2_state(ctx, pp_context,
1186                                obj_surface->bo, in_wpitch * in_hpitch,
1187                                in_w, in_h, in_wpitch,
1188                                0, 0,
1189                                SURFACE_FORMAT_PLANAR_420_8, 1,
1190                                2);
1191
1192     /* destination surface */
1193     obj_surface = SURFACE(dst_surface->id);
1194     out_w = obj_surface->orig_width;
1195     out_h = obj_surface->orig_height;
1196     out_wpitch = obj_surface->width;
1197     out_hpitch = obj_surface->height;
1198     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1199
1200     /* destination Y surface index 7 */
1201     i965_pp_set_surface_state(ctx, pp_context,
1202                               obj_surface->bo, 0,
1203                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1204                               7, 1);
1205
1206     /* destination UV surface index 8 */
1207     i965_pp_set_surface_state(ctx, pp_context,
1208                               obj_surface->bo, out_wpitch * out_hpitch,
1209                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1210                               8, 1);
1211
1212     /* sampler 8x8 state */
1213     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1214     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1215     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1216     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1217     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1218     sampler_8x8_state->dw136.default_sharpness_level = 0;
1219     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1220     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1221     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1222     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1223
1224     /* sampler 8x8 */
1225     dri_bo_map(pp_context->sampler_state_table.bo, True);
1226     assert(pp_context->sampler_state_table.bo->virtual);
1227     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1228     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1229
1230     /* sample_8x8 Y index 1 */
1231     index = 1;
1232     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1233     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1234     sampler_8x8[index].dw0.ief_bypass = 0;
1235     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1236     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1237     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1238     sampler_8x8[index].dw2.global_noise_estimation = 22;
1239     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1240     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1241     sampler_8x8[index].dw3.strong_edge_weight = 7;
1242     sampler_8x8[index].dw3.regular_weight = 2;
1243     sampler_8x8[index].dw3.non_edge_weight = 0;
1244     sampler_8x8[index].dw3.gain_factor = 40;
1245     sampler_8x8[index].dw4.steepness_boost = 0;
1246     sampler_8x8[index].dw4.steepness_threshold = 0;
1247     sampler_8x8[index].dw4.mr_boost = 0;
1248     sampler_8x8[index].dw4.mr_threshold = 5;
1249     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1250     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1251     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1252     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1253     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1254     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1255     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1256     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1257     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1258     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1259     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1260     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1261     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1262     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1263     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1264     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1265     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1266     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1267     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1268     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1269     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1270     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1271     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1272     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1273     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1274     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1275     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1276     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1277     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1278     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1279     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1280     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1281     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1282     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1283     sampler_8x8[index].dw13.limiter_boost = 0;
1284     sampler_8x8[index].dw13.minimum_limiter = 10;
1285     sampler_8x8[index].dw13.maximum_limiter = 11;
1286     sampler_8x8[index].dw14.clip_limiter = 130;
1287     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1288                       I915_GEM_DOMAIN_RENDER, 
1289                       0,
1290                       0,
1291                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1292                       pp_context->sampler_state_table.bo_8x8);
1293
1294     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1295     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1296     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1297     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1298     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1299     sampler_8x8_state->dw136.default_sharpness_level = 0;
1300     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1301     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1302     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1303     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1304
1305     /* sample_8x8 UV index 2 */
1306     index = 2;
1307     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1308     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1309     sampler_8x8[index].dw0.ief_bypass = 0;
1310     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1311     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1312     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1313     sampler_8x8[index].dw2.global_noise_estimation = 22;
1314     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1315     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1316     sampler_8x8[index].dw3.strong_edge_weight = 7;
1317     sampler_8x8[index].dw3.regular_weight = 2;
1318     sampler_8x8[index].dw3.non_edge_weight = 0;
1319     sampler_8x8[index].dw3.gain_factor = 40;
1320     sampler_8x8[index].dw4.steepness_boost = 0;
1321     sampler_8x8[index].dw4.steepness_threshold = 0;
1322     sampler_8x8[index].dw4.mr_boost = 0;
1323     sampler_8x8[index].dw4.mr_threshold = 5;
1324     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1325     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1326     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1327     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1328     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1329     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1330     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1331     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1332     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1333     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1334     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1335     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1336     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1337     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1338     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1339     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1340     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1341     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1342     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1343     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1344     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1345     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1346     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1347     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1348     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1349     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1350     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1351     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1352     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1353     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1354     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1355     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1356     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1357     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1358     sampler_8x8[index].dw13.limiter_boost = 0;
1359     sampler_8x8[index].dw13.minimum_limiter = 10;
1360     sampler_8x8[index].dw13.maximum_limiter = 11;
1361     sampler_8x8[index].dw14.clip_limiter = 130;
1362     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1363                       I915_GEM_DOMAIN_RENDER, 
1364                       0,
1365                       0,
1366                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1367                       pp_context->sampler_state_table.bo_8x8_uv);
1368
1369     dri_bo_unmap(pp_context->sampler_state_table.bo);
1370
1371     /* private function & data */
1372     pp_context->pp_x_steps = pp_avs_x_steps;
1373     pp_context->pp_y_steps = pp_avs_y_steps;
1374     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1375
1376     pp_avs_context->dest_x = dst_rect->x;
1377     pp_avs_context->dest_y = dst_rect->y;
1378     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1379     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1380     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1381     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1382     pp_avs_context->src_w = src_rect->width;
1383     pp_avs_context->src_h = src_rect->height;
1384
1385     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1386     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1387
1388     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1389     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1390     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1391     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1392     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1393     pp_inline_parameter.grf6.video_step_delta = 0.0;
1394 }
1395
1396 static int
1397 pp_dndi_x_steps(void *private_context)
1398 {
1399     return 1;
1400 }
1401
1402 static int
1403 pp_dndi_y_steps(void *private_context)
1404 {
1405     struct pp_dndi_context *pp_dndi_context = private_context;
1406
1407     return pp_dndi_context->dest_h / 4;
1408 }
1409
1410 static int
1411 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1412 {
1413     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1414     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1415
1416     return 0;
1417 }
1418
1419 static 
1420 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1421                              const struct i965_surface *src_surface,
1422                              const VARectangle *src_rect,
1423                              const struct i965_surface *dst_surface,
1424                              const VARectangle *dst_rect)
1425 {
1426     struct i965_driver_data *i965 = i965_driver_data(ctx);
1427     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1428     struct object_surface *obj_surface;
1429     struct i965_sampler_dndi *sampler_dndi;
1430     int index;
1431     int w, h;
1432     int orig_w, orig_h;
1433
1434     /* surface */
1435     obj_surface = SURFACE(src_surface->id);
1436     orig_w = obj_surface->orig_width;
1437     orig_h = obj_surface->orig_height;
1438     w = obj_surface->width;
1439     h = obj_surface->height;
1440
1441     if (pp_context->stmm.bo == NULL) {
1442         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1443                                            "STMM surface",
1444                                            w * h,
1445                                            4096);
1446         assert(pp_context->stmm.bo);
1447     }
1448
1449     /* source UV surface index 2 */
1450     i965_pp_set_surface_state(ctx, pp_context,
1451                               obj_surface->bo, w * h,
1452                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1453                               2, 0);
1454
1455     /* source YUV surface index 4 */
1456     i965_pp_set_surface2_state(ctx, pp_context,
1457                                obj_surface->bo, 0,
1458                                orig_w, orig_w, w,
1459                                0, h,
1460                                SURFACE_FORMAT_PLANAR_420_8, 1,
1461                                4);
1462
1463     /* source STMM surface index 20 */
1464     i965_pp_set_surface_state(ctx, pp_context,
1465                               pp_context->stmm.bo, 0,
1466                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1467                               20, 1);
1468
1469     /* destination surface */
1470     obj_surface = SURFACE(dst_surface->id);
1471     orig_w = obj_surface->orig_width;
1472     orig_h = obj_surface->orig_height;
1473     w = obj_surface->width;
1474     h = obj_surface->height;
1475
1476     /* destination Y surface index 7 */
1477     i965_pp_set_surface_state(ctx, pp_context,
1478                               obj_surface->bo, 0,
1479                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1480                               7, 1);
1481
1482     /* destination UV surface index 8 */
1483     i965_pp_set_surface_state(ctx, pp_context,
1484                               obj_surface->bo, w * h,
1485                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1486                               8, 1);
1487     /* sampler dndi */
1488     dri_bo_map(pp_context->sampler_state_table.bo, True);
1489     assert(pp_context->sampler_state_table.bo->virtual);
1490     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1491     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1492
1493     /* sample dndi index 1 */
1494     index = 0;
1495     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1496     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1497     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1498     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1499
1500     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1501     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1502     sampler_dndi[index].dw1.stmm_c2 = 0;
1503     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1504     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1505
1506     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1507     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1508     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1509     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1510
1511     sampler_dndi[index].dw3.maximum_stmm = 128;
1512     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1513     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1514     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1515     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1516
1517     sampler_dndi[index].dw4.sdi_delta = 8;
1518     sampler_dndi[index].dw4.sdi_threshold = 128;
1519     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1520     sampler_dndi[index].dw4.stmm_shift_up = 0;
1521     sampler_dndi[index].dw4.stmm_shift_down = 0;
1522     sampler_dndi[index].dw4.minimum_stmm = 0;
1523
1524     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1525     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1526     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1527     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1528
1529     sampler_dndi[index].dw6.dn_enable = 1;
1530     sampler_dndi[index].dw6.di_enable = 1;
1531     sampler_dndi[index].dw6.di_partial = 0;
1532     sampler_dndi[index].dw6.dndi_top_first = 1;
1533     sampler_dndi[index].dw6.dndi_stream_id = 1;
1534     sampler_dndi[index].dw6.dndi_first_frame = 1;
1535     sampler_dndi[index].dw6.progressive_dn = 0;
1536     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1537     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1538     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1539
1540     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1541     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1542     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1543     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1544
1545     dri_bo_unmap(pp_context->sampler_state_table.bo);
1546
1547     /* private function & data */
1548     pp_context->pp_x_steps = pp_dndi_x_steps;
1549     pp_context->pp_y_steps = pp_dndi_y_steps;
1550     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1551
1552     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1553     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1554     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1555     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1556
1557     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1558     pp_inline_parameter.grf5.number_blocks = w / 16;
1559     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1560     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1561
1562     pp_dndi_context->dest_w = w;
1563     pp_dndi_context->dest_h = h;
1564 }
1565
1566 static void
1567 ironlake_pp_initialize(
1568     VADriverContextP   ctx,
1569     struct i965_post_processing_context *pp_context,
1570     const struct i965_surface *src_surface,
1571     const VARectangle *src_rect,
1572     const struct i965_surface *dst_surface,
1573     const VARectangle *dst_rect,
1574     int                pp_index
1575 )
1576 {
1577     struct i965_driver_data *i965 = i965_driver_data(ctx);
1578     struct pp_module *pp_module;
1579     dri_bo *bo;
1580     int i;
1581
1582     dri_bo_unreference(pp_context->curbe.bo);
1583     bo = dri_bo_alloc(i965->intel.bufmgr,
1584                       "constant buffer",
1585                       4096, 
1586                       4096);
1587     assert(bo);
1588     pp_context->curbe.bo = bo;
1589
1590     dri_bo_unreference(pp_context->binding_table.bo);
1591     bo = dri_bo_alloc(i965->intel.bufmgr, 
1592                       "binding table",
1593                       sizeof(unsigned int), 
1594                       4096);
1595     assert(bo);
1596     pp_context->binding_table.bo = bo;
1597
1598     dri_bo_unreference(pp_context->idrt.bo);
1599     bo = dri_bo_alloc(i965->intel.bufmgr, 
1600                       "interface discriptor", 
1601                       sizeof(struct i965_interface_descriptor), 
1602                       4096);
1603     assert(bo);
1604     pp_context->idrt.bo = bo;
1605     pp_context->idrt.num_interface_descriptors = 0;
1606
1607     dri_bo_unreference(pp_context->sampler_state_table.bo);
1608     bo = dri_bo_alloc(i965->intel.bufmgr, 
1609                       "sampler state table", 
1610                       4096,
1611                       4096);
1612     assert(bo);
1613     dri_bo_map(bo, True);
1614     memset(bo->virtual, 0, bo->size);
1615     dri_bo_unmap(bo);
1616     pp_context->sampler_state_table.bo = bo;
1617
1618     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1619     bo = dri_bo_alloc(i965->intel.bufmgr, 
1620                       "sampler 8x8 state ",
1621                       4096,
1622                       4096);
1623     assert(bo);
1624     pp_context->sampler_state_table.bo_8x8 = bo;
1625
1626     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1627     bo = dri_bo_alloc(i965->intel.bufmgr, 
1628                       "sampler 8x8 state ",
1629                       4096,
1630                       4096);
1631     assert(bo);
1632     pp_context->sampler_state_table.bo_8x8_uv = bo;
1633
1634     dri_bo_unreference(pp_context->vfe_state.bo);
1635     bo = dri_bo_alloc(i965->intel.bufmgr, 
1636                       "vfe state", 
1637                       sizeof(struct i965_vfe_state), 
1638                       4096);
1639     assert(bo);
1640     pp_context->vfe_state.bo = bo;
1641     
1642     for (i = 0; i < MAX_PP_SURFACES; i++) {
1643         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1644         pp_context->surfaces[i].ss_bo = NULL;
1645
1646         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1647         pp_context->surfaces[i].s_bo = NULL;
1648     }
1649
1650     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1651     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1652     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1653     pp_context->current_pp = pp_index;
1654     pp_module = &pp_context->pp_modules[pp_index];
1655     
1656     if (pp_module->initialize)
1657         pp_module->initialize(ctx, pp_context,
1658                               src_surface,
1659                               src_rect,
1660                               dst_surface,
1661                               dst_rect);
1662 }
1663
1664 static void
1665 ironlake_post_processing(
1666     VADriverContextP   ctx,
1667     struct i965_post_processing_context *pp_context,
1668     const struct i965_surface *src_surface,
1669     const VARectangle *src_rect,
1670     const struct i965_surface *dst_surface,
1671     const VARectangle *dst_rect,
1672     int                pp_index
1673 )
1674 {
1675     ironlake_pp_initialize(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index);
1676     ironlake_pp_states_setup(ctx, pp_context);
1677     ironlake_pp_pipeline_setup(ctx, pp_context);
1678 }
1679
1680 static void
1681 gen6_pp_initialize(
1682     VADriverContextP   ctx,
1683     struct i965_post_processing_context *pp_context,
1684     const struct i965_surface *src_surface,
1685     const VARectangle *src_rect,
1686     const struct i965_surface *dst_surface,
1687     const VARectangle *dst_rect,
1688     int                pp_index
1689 )
1690 {
1691     struct i965_driver_data *i965 = i965_driver_data(ctx);
1692     struct pp_module *pp_module;
1693     dri_bo *bo;
1694     int i;
1695
1696     dri_bo_unreference(pp_context->curbe.bo);
1697     bo = dri_bo_alloc(i965->intel.bufmgr,
1698                       "constant buffer",
1699                       4096, 
1700                       4096);
1701     assert(bo);
1702     pp_context->curbe.bo = bo;
1703
1704     dri_bo_unreference(pp_context->binding_table.bo);
1705     bo = dri_bo_alloc(i965->intel.bufmgr, 
1706                       "binding table",
1707                       sizeof(unsigned int), 
1708                       4096);
1709     assert(bo);
1710     pp_context->binding_table.bo = bo;
1711
1712     dri_bo_unreference(pp_context->idrt.bo);
1713     bo = dri_bo_alloc(i965->intel.bufmgr, 
1714                       "interface discriptor", 
1715                       sizeof(struct gen6_interface_descriptor_data), 
1716                       4096);
1717     assert(bo);
1718     pp_context->idrt.bo = bo;
1719     pp_context->idrt.num_interface_descriptors = 0;
1720
1721     dri_bo_unreference(pp_context->sampler_state_table.bo);
1722     bo = dri_bo_alloc(i965->intel.bufmgr, 
1723                       "sampler state table", 
1724                       4096,
1725                       4096);
1726     assert(bo);
1727     dri_bo_map(bo, True);
1728     memset(bo->virtual, 0, bo->size);
1729     dri_bo_unmap(bo);
1730     pp_context->sampler_state_table.bo = bo;
1731
1732     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1733     bo = dri_bo_alloc(i965->intel.bufmgr, 
1734                       "sampler 8x8 state ",
1735                       4096,
1736                       4096);
1737     assert(bo);
1738     pp_context->sampler_state_table.bo_8x8 = bo;
1739
1740     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1741     bo = dri_bo_alloc(i965->intel.bufmgr, 
1742                       "sampler 8x8 state ",
1743                       4096,
1744                       4096);
1745     assert(bo);
1746     pp_context->sampler_state_table.bo_8x8_uv = bo;
1747
1748     dri_bo_unreference(pp_context->vfe_state.bo);
1749     bo = dri_bo_alloc(i965->intel.bufmgr, 
1750                       "vfe state", 
1751                       sizeof(struct i965_vfe_state), 
1752                       4096);
1753     assert(bo);
1754     pp_context->vfe_state.bo = bo;
1755     
1756     for (i = 0; i < MAX_PP_SURFACES; i++) {
1757         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1758         pp_context->surfaces[i].ss_bo = NULL;
1759
1760         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1761         pp_context->surfaces[i].s_bo = NULL;
1762     }
1763
1764     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1765     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1766     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1767     pp_context->current_pp = pp_index;
1768     pp_module = &pp_context->pp_modules[pp_index];
1769     
1770     if (pp_module->initialize)
1771         pp_module->initialize(ctx, pp_context,
1772                               src_surface,
1773                               src_rect,
1774                               dst_surface,
1775                               dst_rect);
1776 }
1777
1778 static void
1779 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1780 {
1781     unsigned int *binding_table;
1782     dri_bo *bo = pp_context->binding_table.bo;
1783     int i;
1784
1785     dri_bo_map(bo, 1);
1786     assert(bo->virtual);
1787     binding_table = bo->virtual;
1788     memset(binding_table, 0, bo->size);
1789
1790     for (i = 0; i < MAX_PP_SURFACES; i++) {
1791         if (pp_context->surfaces[i].ss_bo) {
1792             assert(pp_context->surfaces[i].s_bo);
1793
1794             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1795             dri_bo_emit_reloc(bo,
1796                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1797                               0,
1798                               i * sizeof(*binding_table),
1799                               pp_context->surfaces[i].ss_bo);
1800         }
1801     
1802     }
1803
1804     dri_bo_unmap(bo);
1805 }
1806
1807 static void
1808 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1809 {
1810     struct gen6_interface_descriptor_data *desc;
1811     dri_bo *bo;
1812     int pp_index = pp_context->current_pp;
1813
1814     bo = pp_context->idrt.bo;
1815     dri_bo_map(bo, True);
1816     assert(bo->virtual);
1817     desc = bo->virtual;
1818     memset(desc, 0, sizeof(*desc));
1819     desc->desc0.kernel_start_pointer = 
1820         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1821     desc->desc1.single_program_flow = 1;
1822     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1823     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1824     desc->desc2.sampler_state_pointer = 
1825         pp_context->sampler_state_table.bo->offset >> 5;
1826     desc->desc3.binding_table_entry_count = 0;
1827     desc->desc3.binding_table_pointer = 
1828         pp_context->binding_table.bo->offset >> 5; /*reloc */
1829     desc->desc4.constant_urb_entry_read_offset = 0;
1830     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1831
1832     dri_bo_emit_reloc(bo,
1833                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1834                       0,
1835                       offsetof(struct gen6_interface_descriptor_data, desc0),
1836                       pp_context->pp_modules[pp_index].kernel.bo);
1837
1838     dri_bo_emit_reloc(bo,
1839                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1840                       desc->desc2.sampler_count << 2,
1841                       offsetof(struct gen6_interface_descriptor_data, desc2),
1842                       pp_context->sampler_state_table.bo);
1843
1844     dri_bo_emit_reloc(bo,
1845                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1846                       desc->desc3.binding_table_entry_count,
1847                       offsetof(struct gen6_interface_descriptor_data, desc3),
1848                       pp_context->binding_table.bo);
1849
1850     dri_bo_unmap(bo);
1851     pp_context->idrt.num_interface_descriptors++;
1852 }
1853
1854 static void
1855 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1856 {
1857     unsigned char *constant_buffer;
1858
1859     assert(sizeof(pp_static_parameter) == 128);
1860     dri_bo_map(pp_context->curbe.bo, 1);
1861     assert(pp_context->curbe.bo->virtual);
1862     constant_buffer = pp_context->curbe.bo->virtual;
1863     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
1864     dri_bo_unmap(pp_context->curbe.bo);
1865 }
1866
1867 static void
1868 gen6_pp_states_setup(VADriverContextP ctx,
1869                      struct i965_post_processing_context *pp_context)
1870 {
1871     gen6_pp_binding_table(pp_context);
1872     gen6_pp_interface_descriptor_table(pp_context);
1873     gen6_pp_upload_constants(pp_context);
1874 }
1875
1876 static void
1877 gen6_pp_pipeline_select(VADriverContextP ctx,
1878                         struct i965_post_processing_context *pp_context)
1879 {
1880     struct intel_batchbuffer *batch = pp_context->batch;
1881
1882     BEGIN_BATCH(batch, 1);
1883     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1884     ADVANCE_BATCH(batch);
1885 }
1886
1887 static void
1888 gen6_pp_state_base_address(VADriverContextP ctx,
1889                            struct i965_post_processing_context *pp_context)
1890 {
1891     struct intel_batchbuffer *batch = pp_context->batch;
1892
1893     BEGIN_BATCH(batch, 10);
1894     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1895     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1896     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1897     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1898     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1899     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1900     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1901     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1902     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1903     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1904     ADVANCE_BATCH(batch);
1905 }
1906
1907 static void
1908 gen6_pp_vfe_state(VADriverContextP ctx,
1909                   struct i965_post_processing_context *pp_context)
1910 {
1911     struct intel_batchbuffer *batch = pp_context->batch;
1912
1913     BEGIN_BATCH(batch, 8);
1914     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
1915     OUT_BATCH(batch, 0);
1916     OUT_BATCH(batch,
1917               (pp_context->urb.num_vfe_entries - 1) << 16 |
1918               pp_context->urb.num_vfe_entries << 8);
1919     OUT_BATCH(batch, 0);
1920     OUT_BATCH(batch,
1921               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
1922               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
1923     OUT_BATCH(batch, 0);
1924     OUT_BATCH(batch, 0);
1925     OUT_BATCH(batch, 0);
1926     ADVANCE_BATCH(batch);
1927 }
1928
1929 static void
1930 gen6_pp_curbe_load(VADriverContextP ctx,
1931                    struct i965_post_processing_context *pp_context)
1932 {
1933     struct intel_batchbuffer *batch = pp_context->batch;
1934
1935     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
1936
1937     BEGIN_BATCH(batch, 4);
1938     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1939     OUT_BATCH(batch, 0);
1940     OUT_BATCH(batch,
1941               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
1942     OUT_RELOC(batch, 
1943               pp_context->curbe.bo,
1944               I915_GEM_DOMAIN_INSTRUCTION, 0,
1945               0);
1946     ADVANCE_BATCH(batch);
1947 }
1948
1949 static void
1950 gen6_interface_descriptor_load(VADriverContextP ctx,
1951                                struct i965_post_processing_context *pp_context)
1952 {
1953     struct intel_batchbuffer *batch = pp_context->batch;
1954
1955     BEGIN_BATCH(batch, 4);
1956     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
1957     OUT_BATCH(batch, 0);
1958     OUT_BATCH(batch,
1959               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
1960     OUT_RELOC(batch, 
1961               pp_context->idrt.bo,
1962               I915_GEM_DOMAIN_INSTRUCTION, 0,
1963               0);
1964     ADVANCE_BATCH(batch);
1965 }
1966
1967 static void
1968 gen6_pp_object_walker(VADriverContextP ctx,
1969                       struct i965_post_processing_context *pp_context)
1970 {
1971     struct intel_batchbuffer *batch = pp_context->batch;
1972     int x, x_steps, y, y_steps;
1973
1974     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1975     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1976
1977     for (y = 0; y < y_steps; y++) {
1978         for (x = 0; x < x_steps; x++) {
1979             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1980                 BEGIN_BATCH(batch, 22);
1981                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
1982                 OUT_BATCH(batch, 0);
1983                 OUT_BATCH(batch, 0); /* no indirect data */
1984                 OUT_BATCH(batch, 0);
1985                 OUT_BATCH(batch, 0); /* scoreboard */
1986                 OUT_BATCH(batch, 0);
1987
1988                 /* inline data grf 5-6 */
1989                 assert(sizeof(pp_inline_parameter) == 64);
1990                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
1991
1992                 ADVANCE_BATCH(batch);
1993             }
1994         }
1995     }
1996 }
1997
1998 static void
1999 gen6_pp_pipeline_setup(VADriverContextP ctx,
2000                        struct i965_post_processing_context *pp_context)
2001 {
2002     struct intel_batchbuffer *batch = pp_context->batch;
2003
2004     intel_batchbuffer_start_atomic(batch, 0x1000);
2005     intel_batchbuffer_emit_mi_flush(batch);
2006     gen6_pp_pipeline_select(ctx, pp_context);
2007     gen6_pp_curbe_load(ctx, pp_context);
2008     gen6_interface_descriptor_load(ctx, pp_context);
2009     gen6_pp_state_base_address(ctx, pp_context);
2010     gen6_pp_vfe_state(ctx, pp_context);
2011     gen6_pp_object_walker(ctx, pp_context);
2012     intel_batchbuffer_end_atomic(batch);
2013 }
2014
2015 static void
2016 gen6_post_processing(
2017     VADriverContextP   ctx,
2018     struct i965_post_processing_context *pp_context,
2019     const struct i965_surface *src_surface,
2020     const VARectangle *src_rect,
2021     const struct i965_surface *dst_surface,
2022     const VARectangle *dst_rect,
2023     int                pp_index
2024 )
2025 {
2026     gen6_pp_initialize(ctx, pp_context,
2027                        src_surface,
2028                        src_rect,
2029                        dst_surface,
2030                        dst_rect,
2031                        pp_index);
2032     gen6_pp_states_setup(ctx, pp_context);
2033     gen6_pp_pipeline_setup(ctx, pp_context);
2034 }
2035
2036 static void
2037 i965_post_processing_internal(
2038     VADriverContextP   ctx,
2039     struct i965_post_processing_context *pp_context,
2040     const struct i965_surface *src_surface,
2041     const VARectangle *src_rect,
2042     const struct i965_surface *dst_surface,
2043     const VARectangle *dst_rect,
2044     int                pp_index
2045 )
2046 {
2047     struct i965_driver_data *i965 = i965_driver_data(ctx);
2048
2049     if (IS_GEN6(i965->intel.device_id) ||
2050         IS_GEN7(i965->intel.device_id))
2051         gen6_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index);
2052     else
2053         ironlake_post_processing(ctx, pp_context, src_surface, src_rect, dst_surface, dst_rect, pp_index);
2054 }
2055
2056 VAStatus 
2057 i965_DestroySurfaces(VADriverContextP ctx,
2058                      VASurfaceID *surface_list,
2059                      int num_surfaces);
2060 VAStatus 
2061 i965_CreateSurfaces(VADriverContextP ctx,
2062                     int width,
2063                     int height,
2064                     int format,
2065                     int num_surfaces,
2066                     VASurfaceID *surfaces);
2067 VASurfaceID
2068 i965_post_processing(
2069     VADriverContextP   ctx,
2070     VASurfaceID        surface,
2071     const VARectangle *src_rect,
2072     const VARectangle *dst_rect,
2073     unsigned int       flags,
2074     int               *has_done_scaling  
2075 )
2076 {
2077     struct i965_driver_data *i965 = i965_driver_data(ctx);
2078     VASurfaceID in_surface_id = surface;
2079     VASurfaceID out_surface_id = VA_INVALID_ID;
2080     
2081     *has_done_scaling = 0;
2082
2083     if (HAS_PP(i965)) {
2084         struct object_surface *obj_surface;
2085         VAStatus status;
2086         struct i965_surface src_surface;
2087         struct i965_surface dst_surface;
2088
2089         obj_surface = SURFACE(in_surface_id);
2090
2091         /* Currently only support post processing for NV12 surface */
2092         if (obj_surface->fourcc != VA_FOURCC('N', 'V', '1', '2'))
2093             return out_surface_id;
2094
2095         if (flags & I965_PP_FLAG_DEINTERLACING) {
2096             status = i965_CreateSurfaces(ctx,
2097                                          obj_surface->orig_width,
2098                                          obj_surface->orig_height,
2099                                          VA_RT_FORMAT_YUV420,
2100                                          1,
2101                                          &out_surface_id);
2102             assert(status == VA_STATUS_SUCCESS);
2103             obj_surface = SURFACE(out_surface_id);
2104             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2105
2106             src_surface.id = in_surface_id;
2107             src_surface.flag = I965_SURFACE_SURFACE;
2108             dst_surface.id = out_surface_id;
2109             dst_surface.flag = I965_SURFACE_SURFACE;
2110
2111             i965_post_processing_internal(ctx, i965->pp_context,
2112                                           &src_surface,
2113                                           src_rect,
2114                                           &dst_surface,
2115                                           dst_rect,
2116                                           PP_NV12_DNDI);
2117         }
2118
2119         if (flags & I965_PP_FLAG_AVS) {
2120             struct i965_render_state *render_state = &i965->render_state;
2121             struct intel_region *dest_region = render_state->draw_region;
2122
2123             if (out_surface_id != VA_INVALID_ID)
2124                 in_surface_id = out_surface_id;
2125
2126             status = i965_CreateSurfaces(ctx,
2127                                          dest_region->width,
2128                                          dest_region->height,
2129                                          VA_RT_FORMAT_YUV420,
2130                                          1,
2131                                          &out_surface_id);
2132             assert(status == VA_STATUS_SUCCESS);
2133             obj_surface = SURFACE(out_surface_id);
2134             i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2135
2136             src_surface.id = in_surface_id;
2137             src_surface.flag = I965_SURFACE_SURFACE;
2138             dst_surface.id = out_surface_id;
2139             dst_surface.flag = I965_SURFACE_SURFACE;
2140
2141             i965_post_processing_internal(ctx, i965->pp_context,
2142                                           &src_surface,
2143                                           src_rect,
2144                                           &dst_surface,
2145                                           dst_rect,
2146                                           PP_NV12_AVS);
2147
2148             if (in_surface_id != surface)
2149                 i965_DestroySurfaces(ctx, &in_surface_id, 1);
2150                 
2151             *has_done_scaling = 1;
2152         }
2153     }
2154
2155     return out_surface_id;
2156 }       
2157
2158 static VAStatus
2159 i965_image_i420_processing(VADriverContextP ctx,
2160                            const struct i965_surface *src_surface,
2161                            const VARectangle *src_rect,
2162                            const struct i965_surface *dst_surface,
2163                            const VARectangle *dst_rect)
2164 {
2165     struct i965_driver_data *i965 = i965_driver_data(ctx);
2166     struct i965_post_processing_context *pp_context = i965->pp_context;
2167     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2168
2169     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2170         i965_post_processing_internal(ctx, i965->pp_context,
2171                                       src_surface,
2172                                       src_rect,
2173                                       dst_surface,
2174                                       dst_rect,
2175                                       PP_PL3_LOAD_SAVE_N12);
2176     } else {
2177         i965_post_processing_internal(ctx, i965->pp_context,
2178                                       src_surface,
2179                                       src_rect,
2180                                       dst_surface,
2181                                       dst_rect,
2182                                       PP_PL3_LOAD_SAVE_PL3);
2183     }
2184
2185     intel_batchbuffer_flush(pp_context->batch);
2186
2187     return VA_STATUS_SUCCESS;
2188 }
2189
2190 static VAStatus
2191 i965_image_nv12_processing(VADriverContextP ctx,
2192                            const struct i965_surface *src_surface,
2193                            const VARectangle *src_rect,
2194                            const struct i965_surface *dst_surface,
2195                            const VARectangle *dst_rect)
2196 {
2197     struct i965_driver_data *i965 = i965_driver_data(ctx);
2198     struct i965_post_processing_context *pp_context = i965->pp_context;
2199     int fourcc = pp_get_surface_fourcc(ctx, dst_surface);
2200
2201     if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
2202         i965_post_processing_internal(ctx, i965->pp_context,
2203                                       src_surface,
2204                                       src_rect,
2205                                       dst_surface,
2206                                       dst_rect,
2207                                       PP_NV12_LOAD_SAVE_N12);
2208     } else {
2209         i965_post_processing_internal(ctx, i965->pp_context,
2210                                       src_surface,
2211                                       src_rect,
2212                                       dst_surface,
2213                                       dst_rect,
2214                                       PP_NV12_LOAD_SAVE_PL3);
2215     }
2216
2217     intel_batchbuffer_flush(pp_context->batch);
2218
2219     return VA_STATUS_SUCCESS;
2220 }
2221
2222 VAStatus
2223 i965_image_processing(VADriverContextP ctx,
2224                       const struct i965_surface *src_surface,
2225                       const VARectangle *src_rect,
2226                       const struct i965_surface *dst_surface,
2227                       const VARectangle *dst_rect)
2228 {
2229     struct i965_driver_data *i965 = i965_driver_data(ctx);
2230     VAStatus status = VA_STATUS_ERROR_UNIMPLEMENTED;
2231
2232     if (HAS_PP(i965)) {
2233         int fourcc = pp_get_surface_fourcc(ctx, src_surface);
2234
2235         switch (fourcc) {
2236         case VA_FOURCC('Y', 'V', '1', '2'):
2237         case VA_FOURCC('I', '4', '2', '0'):
2238             status = i965_image_i420_processing(ctx,
2239                                                 src_surface,
2240                                                 src_rect,
2241                                                 dst_surface,
2242                                                 dst_rect);
2243             break;
2244
2245         case  VA_FOURCC('N', 'V', '1', '2'):
2246             status = i965_image_nv12_processing(ctx,
2247                                                 src_surface,
2248                                                 src_rect,
2249                                                 dst_surface,
2250                                                 dst_rect);
2251             break;
2252
2253         default:
2254             status = VA_STATUS_ERROR_UNIMPLEMENTED;
2255             break;
2256         }
2257     }
2258
2259     return status;
2260 }       
2261
2262 static void
2263 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
2264 {
2265     int i;
2266
2267     dri_bo_unreference(pp_context->curbe.bo);
2268     pp_context->curbe.bo = NULL;
2269
2270     for (i = 0; i < MAX_PP_SURFACES; i++) {
2271         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2272         pp_context->surfaces[i].ss_bo = NULL;
2273
2274         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2275         pp_context->surfaces[i].s_bo = NULL;
2276     }
2277
2278     dri_bo_unreference(pp_context->sampler_state_table.bo);
2279     pp_context->sampler_state_table.bo = NULL;
2280
2281     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2282     pp_context->sampler_state_table.bo_8x8 = NULL;
2283
2284     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2285     pp_context->sampler_state_table.bo_8x8_uv = NULL;
2286
2287     dri_bo_unreference(pp_context->binding_table.bo);
2288     pp_context->binding_table.bo = NULL;
2289
2290     dri_bo_unreference(pp_context->idrt.bo);
2291     pp_context->idrt.bo = NULL;
2292     pp_context->idrt.num_interface_descriptors = 0;
2293
2294     dri_bo_unreference(pp_context->vfe_state.bo);
2295     pp_context->vfe_state.bo = NULL;
2296
2297     dri_bo_unreference(pp_context->stmm.bo);
2298     pp_context->stmm.bo = NULL;
2299
2300     for (i = 0; i < NUM_PP_MODULES; i++) {
2301         struct pp_module *pp_module = &pp_context->pp_modules[i];
2302
2303         dri_bo_unreference(pp_module->kernel.bo);
2304         pp_module->kernel.bo = NULL;
2305     }
2306
2307 }
2308
2309 Bool
2310 i965_post_processing_terminate(VADriverContextP ctx)
2311 {
2312     struct i965_driver_data *i965 = i965_driver_data(ctx);
2313     struct i965_post_processing_context *pp_context = i965->pp_context;
2314
2315     if (pp_context) {
2316         i965_post_processing_context_finalize(pp_context);
2317         free(pp_context);
2318     }
2319
2320     i965->pp_context = NULL;
2321
2322     return True;
2323 }
2324
2325 static void
2326 i965_post_processing_context_init(VADriverContextP ctx,
2327                                   struct i965_post_processing_context *pp_context,
2328                                   struct intel_batchbuffer *batch)
2329 {
2330     struct i965_driver_data *i965 = i965_driver_data(ctx);
2331     int i;
2332
2333     pp_context->urb.size = URB_SIZE((&i965->intel));
2334     pp_context->urb.num_vfe_entries = 32;
2335     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2336     pp_context->urb.num_cs_entries = 1;
2337     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2338     pp_context->urb.vfe_start = 0;
2339     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2340         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2341     assert(pp_context->urb.cs_start + 
2342            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2343
2344     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2345     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2346
2347     if (IS_GEN6(i965->intel.device_id) ||
2348         IS_GEN7(i965->intel.device_id))
2349         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2350     else if (IS_IRONLAKE(i965->intel.device_id))
2351         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2352
2353     for (i = 0; i < NUM_PP_MODULES; i++) {
2354         struct pp_module *pp_module = &pp_context->pp_modules[i];
2355         dri_bo_unreference(pp_module->kernel.bo);
2356         if (pp_module->kernel.bin) {
2357             pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2358                                                 pp_module->kernel.name,
2359                                                 pp_module->kernel.size,
2360                                                 4096);
2361             assert(pp_module->kernel.bo);
2362             dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2363         } else {
2364             pp_module->kernel.bo = NULL;
2365         }
2366     }
2367
2368     pp_context->batch = batch;
2369 }
2370
2371 Bool
2372 i965_post_processing_init(VADriverContextP ctx)
2373 {
2374     struct i965_driver_data *i965 = i965_driver_data(ctx);
2375     struct i965_post_processing_context *pp_context = i965->pp_context;
2376
2377     if (HAS_PP(i965)) {
2378         if (pp_context == NULL) {
2379             pp_context = calloc(1, sizeof(*pp_context));
2380             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2381             i965->pp_context = pp_context;
2382         }
2383     }
2384
2385     return True;
2386 }
2387
2388 static void 
2389 i965_proc_picture(VADriverContextP ctx, 
2390                   VAProfile profile, 
2391                   union codec_state *codec_state,
2392                   struct hw_context *hw_context)
2393 {
2394     struct i965_driver_data *i965 = i965_driver_data(ctx);
2395     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2396     struct proc_state *proc_state = &codec_state->proc;
2397     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2398     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2399     struct object_surface *obj_surface;
2400     struct i965_surface src_surface, dst_surface;
2401
2402     assert(input_param->surface != VA_INVALID_ID);
2403     assert(proc_state->current_render_target != VA_INVALID_ID);
2404
2405     obj_surface = SURFACE(proc_state->current_render_target);
2406     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2407
2408     src_surface.id = input_param->surface;
2409     src_surface.flag = I965_SURFACE_SURFACE;
2410     dst_surface.id = proc_state->current_render_target;
2411     dst_surface.flag = I965_SURFACE_SURFACE;
2412
2413     i965_post_processing_internal(ctx, &proc_context->pp_context,
2414                                   &src_surface,
2415                                   &input_param->region,
2416                                   &dst_surface,
2417                                   &pipeline_param->output_region,
2418                                   PP_NV12_AVS);
2419
2420     intel_batchbuffer_flush(hw_context->batch);
2421 }
2422
2423 static void
2424 i965_proc_context_destroy(void *hw_context)
2425 {
2426     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2427
2428     i965_post_processing_context_finalize(&proc_context->pp_context);
2429     intel_batchbuffer_free(proc_context->base.batch);
2430     free(proc_context);
2431 }
2432
2433 struct hw_context *
2434 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2435 {
2436     struct intel_driver_data *intel = intel_driver_data(ctx);
2437     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2438
2439     proc_context->base.destroy = i965_proc_context_destroy;
2440     proc_context->base.run = i965_proc_picture;
2441     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2442     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2443
2444     return (struct hw_context *)proc_context;
2445 }