i965_drv_video: handle VPP buffers
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 static const uint32_t pp_null_gen5[][4] = {
49 #include "shaders/post_processing/null.g4b.gen5"
50 };
51
52 static const uint32_t pp_nv12_load_save_gen5[][4] = {
53 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
54 };
55
56 static const uint32_t pp_nv12_scaling_gen5[][4] = {
57 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_avs_gen5[][4] = {
61 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_dndi_gen5[][4] = {
65 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
66 };
67
68 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
69                                VASurfaceID in_surface_id, VASurfaceID out_surface_id,
70                                const VARectangle *src_rect, const VARectangle *dst_rect);
71 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
72                                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
73                                    const VARectangle *src_rect, const VARectangle *dst_rect);
74 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
75                                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
76                                        const VARectangle *src_rect, const VARectangle *dst_rect);
77 static void pp_nv12_load_save_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
78                                          VASurfaceID in_surface_id, VASurfaceID out_surface_id,
79                                          const VARectangle *src_rect, const VARectangle *dst_rect);
80 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
81                                     VASurfaceID in_surface_id, VASurfaceID out_surface_id,
82                                     const VARectangle *src_rect, const VARectangle *dst_rect);
83
84 static struct pp_module pp_modules_gen5[] = {
85     {
86         {
87             "NULL module (for testing)",
88             PP_NULL,
89             pp_null_gen5,
90             sizeof(pp_null_gen5),
91             NULL,
92         },
93
94         pp_null_initialize,
95     },
96
97     {
98         {
99             "NV12 Load & Save module",
100             PP_NV12_LOAD_SAVE,
101             pp_nv12_load_save_gen5,
102             sizeof(pp_nv12_load_save_gen5),
103             NULL,
104         },
105
106         pp_nv12_load_save_initialize,
107     },
108
109     {
110         {
111             "NV12 Scaling module",
112             PP_NV12_SCALING,
113             pp_nv12_scaling_gen5,
114             sizeof(pp_nv12_scaling_gen5),
115             NULL,
116         },
117
118         pp_nv12_scaling_initialize,
119     },
120
121     {
122         {
123             "NV12 AVS module",
124             PP_NV12_AVS,
125             pp_nv12_avs_gen5,
126             sizeof(pp_nv12_avs_gen5),
127             NULL,
128         },
129
130         pp_nv12_avs_initialize,
131     },
132
133     {
134         {
135             "NV12 DNDI module",
136             PP_NV12_DNDI,
137             pp_nv12_dndi_gen5,
138             sizeof(pp_nv12_dndi_gen5),
139             NULL,
140         },
141
142         pp_nv12_dndi_initialize,
143     },
144 };
145
146 static const uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
148 };
149
150 static const uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
152 };
153
154 static const uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
156 };
157
158 static const uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
160 };
161
162 static const uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
164 };
165
166 static struct pp_module pp_modules_gen6[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen6,
172             sizeof(pp_null_gen6),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12 Load & Save module",
182             PP_NV12_LOAD_SAVE,
183             pp_nv12_load_save_gen6,
184             sizeof(pp_nv12_load_save_gen6),
185             NULL,
186         },
187
188         pp_nv12_load_save_initialize,
189     },
190
191     {
192         {
193             "NV12 Scaling module",
194             PP_NV12_SCALING,
195             pp_nv12_scaling_gen6,
196             sizeof(pp_nv12_scaling_gen6),
197             NULL,
198         },
199
200         pp_nv12_scaling_initialize,
201     },
202
203     {
204         {
205             "NV12 AVS module",
206             PP_NV12_AVS,
207             pp_nv12_avs_gen6,
208             sizeof(pp_nv12_avs_gen6),
209             NULL,
210         },
211
212         pp_nv12_avs_initialize,
213     },
214
215     {
216         {
217             "NV12 DNDI module",
218             PP_NV12_DNDI,
219             pp_nv12_dndi_gen6,
220             sizeof(pp_nv12_dndi_gen6),
221             NULL,
222         },
223
224         pp_nv12_dndi_initialize,
225     },
226 };
227
228 #define pp_static_parameter     pp_context->pp_static_parameter
229 #define pp_inline_parameter     pp_context->pp_inline_parameter
230
231 static void
232 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234     switch (tiling) {
235     case I915_TILING_NONE:
236         ss->ss3.tiled_surface = 0;
237         ss->ss3.tile_walk = 0;
238         break;
239     case I915_TILING_X:
240         ss->ss3.tiled_surface = 1;
241         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242         break;
243     case I915_TILING_Y:
244         ss->ss3.tiled_surface = 1;
245         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246         break;
247     }
248 }
249
250 static void
251 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253     switch (tiling) {
254     case I915_TILING_NONE:
255         ss->ss2.tiled_surface = 0;
256         ss->ss2.tile_walk = 0;
257         break;
258     case I915_TILING_X:
259         ss->ss2.tiled_surface = 1;
260         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261         break;
262     case I915_TILING_Y:
263         ss->ss2.tiled_surface = 1;
264         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265         break;
266     }
267 }
268
269 static void
270 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
271 {
272
273 }
274
275 static void
276 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
277 {
278     struct i965_interface_descriptor *desc;
279     dri_bo *bo;
280     int pp_index = pp_context->current_pp;
281
282     bo = pp_context->idrt.bo;
283     dri_bo_map(bo, 1);
284     assert(bo->virtual);
285     desc = bo->virtual;
286     memset(desc, 0, sizeof(*desc));
287     desc->desc0.grf_reg_blocks = 10;
288     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
289     desc->desc1.const_urb_entry_read_offset = 0;
290     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
291     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
292     desc->desc2.sampler_count = 0;
293     desc->desc3.binding_table_entry_count = 0;
294     desc->desc3.binding_table_pointer = 
295         pp_context->binding_table.bo->offset >> 5; /*reloc */
296
297     dri_bo_emit_reloc(bo,
298                       I915_GEM_DOMAIN_INSTRUCTION, 0,
299                       desc->desc0.grf_reg_blocks,
300                       offsetof(struct i965_interface_descriptor, desc0),
301                       pp_context->pp_modules[pp_index].kernel.bo);
302
303     dri_bo_emit_reloc(bo,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       desc->desc2.sampler_count << 2,
306                       offsetof(struct i965_interface_descriptor, desc2),
307                       pp_context->sampler_state_table.bo);
308
309     dri_bo_emit_reloc(bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, 0,
311                       desc->desc3.binding_table_entry_count,
312                       offsetof(struct i965_interface_descriptor, desc3),
313                       pp_context->binding_table.bo);
314
315     dri_bo_unmap(bo);
316     pp_context->idrt.num_interface_descriptors++;
317 }
318
319 static void
320 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
321 {
322     unsigned int *binding_table;
323     dri_bo *bo = pp_context->binding_table.bo;
324     int i;
325
326     dri_bo_map(bo, 1);
327     assert(bo->virtual);
328     binding_table = bo->virtual;
329     memset(binding_table, 0, bo->size);
330
331     for (i = 0; i < MAX_PP_SURFACES; i++) {
332         if (pp_context->surfaces[i].ss_bo) {
333             assert(pp_context->surfaces[i].s_bo);
334
335             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
336             dri_bo_emit_reloc(bo,
337                               I915_GEM_DOMAIN_INSTRUCTION, 0,
338                               0,
339                               i * sizeof(*binding_table),
340                               pp_context->surfaces[i].ss_bo);
341         }
342     
343     }
344
345     dri_bo_unmap(bo);
346 }
347
348 static void
349 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
350 {
351     struct i965_vfe_state *vfe_state;
352     dri_bo *bo;
353
354     bo = pp_context->vfe_state.bo;
355     dri_bo_map(bo, 1);
356     assert(bo->virtual);
357     vfe_state = bo->virtual;
358     memset(vfe_state, 0, sizeof(*vfe_state));
359     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
360     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
361     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
362     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
363     vfe_state->vfe1.children_present = 0;
364     vfe_state->vfe2.interface_descriptor_base = 
365         pp_context->idrt.bo->offset >> 4; /* reloc */
366     dri_bo_emit_reloc(bo,
367                       I915_GEM_DOMAIN_INSTRUCTION, 0,
368                       0,
369                       offsetof(struct i965_vfe_state, vfe2),
370                       pp_context->idrt.bo);
371     dri_bo_unmap(bo);
372 }
373
374 static void
375 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
376 {
377     unsigned char *constant_buffer;
378
379     assert(sizeof(pp_static_parameter) == 128);
380     dri_bo_map(pp_context->curbe.bo, 1);
381     assert(pp_context->curbe.bo->virtual);
382     constant_buffer = pp_context->curbe.bo->virtual;
383     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
384     dri_bo_unmap(pp_context->curbe.bo);
385 }
386
387 static void
388 ironlake_pp_states_setup(VADriverContextP ctx,
389                          struct i965_post_processing_context *pp_context)
390 {
391     ironlake_pp_surface_state(pp_context);
392     ironlake_pp_binding_table(pp_context);
393     ironlake_pp_interface_descriptor_table(pp_context);
394     ironlake_pp_vfe_state(pp_context);
395     ironlake_pp_upload_constants(pp_context);
396 }
397
398 static void
399 ironlake_pp_pipeline_select(VADriverContextP ctx,
400                             struct i965_post_processing_context *pp_context)
401 {
402     struct intel_batchbuffer *batch = pp_context->batch;
403
404     BEGIN_BATCH(batch, 1);
405     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
406     ADVANCE_BATCH(batch);
407 }
408
409 static void
410 ironlake_pp_urb_layout(VADriverContextP ctx,
411                        struct i965_post_processing_context *pp_context)
412 {
413     struct intel_batchbuffer *batch = pp_context->batch;
414     unsigned int vfe_fence, cs_fence;
415
416     vfe_fence = pp_context->urb.cs_start;
417     cs_fence = pp_context->urb.size;
418
419     BEGIN_BATCH(batch, 3);
420     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
421     OUT_BATCH(batch, 0);
422     OUT_BATCH(batch, 
423               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
424               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
425     ADVANCE_BATCH(batch);
426 }
427
428 static void
429 ironlake_pp_state_base_address(VADriverContextP ctx,
430                                struct i965_post_processing_context *pp_context)
431 {
432     struct intel_batchbuffer *batch = pp_context->batch;
433
434     BEGIN_BATCH(batch, 8);
435     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
436     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
437     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     ADVANCE_BATCH(batch);
444 }
445
446 static void
447 ironlake_pp_state_pointers(VADriverContextP ctx,
448                            struct i965_post_processing_context *pp_context)
449 {
450     struct intel_batchbuffer *batch = pp_context->batch;
451
452     BEGIN_BATCH(batch, 3);
453     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
454     OUT_BATCH(batch, 0);
455     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
456     ADVANCE_BATCH(batch);
457 }
458
459 static void 
460 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
461                           struct i965_post_processing_context *pp_context)
462 {
463     struct intel_batchbuffer *batch = pp_context->batch;
464
465     BEGIN_BATCH(batch, 2);
466     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
467     OUT_BATCH(batch,
468               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
469               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
470     ADVANCE_BATCH(batch);
471 }
472
473 static void
474 ironlake_pp_constant_buffer(VADriverContextP ctx,
475                             struct i965_post_processing_context *pp_context)
476 {
477     struct intel_batchbuffer *batch = pp_context->batch;
478
479     BEGIN_BATCH(batch, 2);
480     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
481     OUT_RELOC(batch, pp_context->curbe.bo,
482               I915_GEM_DOMAIN_INSTRUCTION, 0,
483               pp_context->urb.size_cs_entry - 1);
484     ADVANCE_BATCH(batch);    
485 }
486
487 static void
488 ironlake_pp_object_walker(VADriverContextP ctx,
489                           struct i965_post_processing_context *pp_context)
490 {
491     struct intel_batchbuffer *batch = pp_context->batch;
492     int x, x_steps, y, y_steps;
493
494     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
495     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
496
497     for (y = 0; y < y_steps; y++) {
498         for (x = 0; x < x_steps; x++) {
499             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
500                 BEGIN_BATCH(batch, 20);
501                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
502                 OUT_BATCH(batch, 0);
503                 OUT_BATCH(batch, 0); /* no indirect data */
504                 OUT_BATCH(batch, 0);
505
506                 /* inline data grf 5-6 */
507                 assert(sizeof(pp_inline_parameter) == 64);
508                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
509
510                 ADVANCE_BATCH(batch);
511             }
512         }
513     }
514 }
515
516 static void
517 ironlake_pp_pipeline_setup(VADriverContextP ctx,
518                            struct i965_post_processing_context *pp_context)
519 {
520     struct intel_batchbuffer *batch = pp_context->batch;
521
522     intel_batchbuffer_start_atomic(batch, 0x1000);
523     intel_batchbuffer_emit_mi_flush(batch);
524     ironlake_pp_pipeline_select(ctx, pp_context);
525     ironlake_pp_state_base_address(ctx, pp_context);
526     ironlake_pp_state_pointers(ctx, pp_context);
527     ironlake_pp_urb_layout(ctx, pp_context);
528     ironlake_pp_cs_urb_layout(ctx, pp_context);
529     ironlake_pp_constant_buffer(ctx, pp_context);
530     ironlake_pp_object_walker(ctx, pp_context);
531     intel_batchbuffer_end_atomic(batch);
532 }
533
534 static int
535 pp_null_x_steps(void *private_context)
536 {
537     return 1;
538 }
539
540 static int
541 pp_null_y_steps(void *private_context)
542 {
543     return 1;
544 }
545
546 static int
547 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
548 {
549     return 0;
550 }
551
552 static void
553 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
554                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
555                    const VARectangle *src_rect, const VARectangle *dst_rect)
556 {
557     /* private function & data */
558     pp_context->pp_x_steps = pp_null_x_steps;
559     pp_context->pp_y_steps = pp_null_y_steps;
560     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
561 }
562
563 static int
564 pp_load_save_x_steps(void *private_context)
565 {
566     return 1;
567 }
568
569 static int
570 pp_load_save_y_steps(void *private_context)
571 {
572     struct pp_load_save_context *pp_load_save_context = private_context;
573
574     return pp_load_save_context->dest_h / 8;
575 }
576
577 static int
578 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
579 {
580     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
581     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
582     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
583     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
584
585     return 0;
586 }
587
588 static void
589 pp_nv12_load_save_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
590                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
591                              const VARectangle *src_rect, const VARectangle *dst_rect)
592 {
593     struct i965_driver_data *i965 = i965_driver_data(ctx);
594     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
595     struct object_surface *obj_surface;
596     struct i965_surface_state *ss;
597     dri_bo *bo;
598     int index, w, h;
599     int orig_w, orig_h;
600     unsigned int tiling, swizzle;
601
602     /* source surface */
603     obj_surface = SURFACE(in_surface_id);
604     orig_w = obj_surface->orig_width;
605     orig_h = obj_surface->orig_height;
606     w = obj_surface->width;
607     h = obj_surface->height;
608     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
609
610     /* source Y surface index 1 */
611     index = 1;
612     pp_context->surfaces[index].s_bo = obj_surface->bo;
613     dri_bo_reference(pp_context->surfaces[index].s_bo);
614     bo = dri_bo_alloc(i965->intel.bufmgr, 
615                       "surface state", 
616                       sizeof(struct i965_surface_state), 
617                       4096);
618     assert(bo);
619     pp_context->surfaces[index].ss_bo = bo;
620     dri_bo_map(bo, True);
621     assert(bo->virtual);
622     ss = bo->virtual;
623     memset(ss, 0, sizeof(*ss));
624     ss->ss0.surface_type = I965_SURFACE_2D;
625     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
626     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
627     ss->ss2.width = orig_w / 4 - 1;
628     ss->ss2.height = orig_h - 1;
629     ss->ss3.pitch = w - 1;
630     pp_set_surface_tiling(ss, tiling);
631     dri_bo_emit_reloc(bo,
632                       I915_GEM_DOMAIN_RENDER, 
633                       0,
634                       0,
635                       offsetof(struct i965_surface_state, ss1),
636                       pp_context->surfaces[index].s_bo);
637     dri_bo_unmap(bo);
638
639     /* source UV surface index 2 */
640     index = 2;
641     pp_context->surfaces[index].s_bo = obj_surface->bo;
642     dri_bo_reference(pp_context->surfaces[index].s_bo);
643     bo = dri_bo_alloc(i965->intel.bufmgr, 
644                       "surface state", 
645                       sizeof(struct i965_surface_state), 
646                       4096);
647     assert(bo);
648     pp_context->surfaces[index].ss_bo = bo;
649     dri_bo_map(bo, True);
650     assert(bo->virtual);
651     ss = bo->virtual;
652     memset(ss, 0, sizeof(*ss));
653     ss->ss0.surface_type = I965_SURFACE_2D;
654     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
655     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
656     ss->ss2.width = orig_w / 4 - 1;
657     ss->ss2.height = orig_h / 2 - 1;
658     ss->ss3.pitch = w - 1;
659     pp_set_surface_tiling(ss, tiling);
660     dri_bo_emit_reloc(bo,
661                       I915_GEM_DOMAIN_RENDER, 
662                       0,
663                       w * h,
664                       offsetof(struct i965_surface_state, ss1),
665                       pp_context->surfaces[index].s_bo);
666     dri_bo_unmap(bo);
667
668     /* destination surface */
669     obj_surface = SURFACE(out_surface_id);
670     orig_w = obj_surface->orig_width;
671     orig_h = obj_surface->orig_height;
672     w = obj_surface->width;
673     h = obj_surface->height;
674     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
675
676     /* destination Y surface index 7 */
677     index = 7;
678     pp_context->surfaces[index].s_bo = obj_surface->bo;
679     dri_bo_reference(pp_context->surfaces[index].s_bo);
680     bo = dri_bo_alloc(i965->intel.bufmgr, 
681                       "surface state", 
682                       sizeof(struct i965_surface_state), 
683                       4096);
684     assert(bo);
685     pp_context->surfaces[index].ss_bo = bo;
686     dri_bo_map(bo, True);
687     assert(bo->virtual);
688     ss = bo->virtual;
689     memset(ss, 0, sizeof(*ss));
690     ss->ss0.surface_type = I965_SURFACE_2D;
691     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
692     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
693     ss->ss2.width = orig_w / 4 - 1;
694     ss->ss2.height = orig_h - 1;
695     ss->ss3.pitch = w - 1;
696     pp_set_surface_tiling(ss, tiling);
697     dri_bo_emit_reloc(bo,
698                       I915_GEM_DOMAIN_RENDER, 
699                       I915_GEM_DOMAIN_RENDER,
700                       0,
701                       offsetof(struct i965_surface_state, ss1),
702                       pp_context->surfaces[index].s_bo);
703     dri_bo_unmap(bo);
704
705     /* destination UV surface index 8 */
706     index = 8;
707     pp_context->surfaces[index].s_bo = obj_surface->bo;
708     dri_bo_reference(pp_context->surfaces[index].s_bo);
709     bo = dri_bo_alloc(i965->intel.bufmgr, 
710                       "surface state", 
711                       sizeof(struct i965_surface_state), 
712                       4096);
713     assert(bo);
714     pp_context->surfaces[index].ss_bo = bo;
715     dri_bo_map(bo, True);
716     assert(bo->virtual);
717     ss = bo->virtual;
718     memset(ss, 0, sizeof(*ss));
719     ss->ss0.surface_type = I965_SURFACE_2D;
720     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
721     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
722     ss->ss2.width = orig_w / 4 - 1;
723     ss->ss2.height = orig_h / 2 - 1;
724     ss->ss3.pitch = w - 1;
725     pp_set_surface_tiling(ss, tiling);
726     dri_bo_emit_reloc(bo,
727                       I915_GEM_DOMAIN_RENDER, 
728                       I915_GEM_DOMAIN_RENDER,
729                       w * h,
730                       offsetof(struct i965_surface_state, ss1),
731                       pp_context->surfaces[index].s_bo);
732     dri_bo_unmap(bo);
733
734     /* private function & data */
735     pp_context->pp_x_steps = pp_load_save_x_steps;
736     pp_context->pp_y_steps = pp_load_save_y_steps;
737     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
738     pp_load_save_context->dest_h = h;
739     pp_load_save_context->dest_w = w;
740
741     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
742     pp_inline_parameter.grf5.number_blocks = w / 16;
743 }
744
745 static int
746 pp_scaling_x_steps(void *private_context)
747 {
748     return 1;
749 }
750
751 static int
752 pp_scaling_y_steps(void *private_context)
753 {
754     struct pp_scaling_context *pp_scaling_context = private_context;
755
756     return pp_scaling_context->dest_h / 8;
757 }
758
759 static int
760 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
761 {
762     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
763     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
764     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
765
766     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
767     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
768     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
769     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
770     
771     return 0;
772 }
773
774 static void
775 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
776                            VASurfaceID in_surface_id, VASurfaceID out_surface_id,
777                            const VARectangle *src_rect, const VARectangle *dst_rect)
778 {
779     struct i965_driver_data *i965 = i965_driver_data(ctx);
780     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
781     struct object_surface *obj_surface;
782     struct i965_sampler_state *sampler_state;
783     struct i965_surface_state *ss;
784     dri_bo *bo;
785     int index;
786     int in_w, in_h, in_wpitch, in_hpitch;
787     int out_w, out_h, out_wpitch, out_hpitch;
788     unsigned int tiling, swizzle;
789
790     /* source surface */
791     obj_surface = SURFACE(in_surface_id);
792     in_w = obj_surface->orig_width;
793     in_h = obj_surface->orig_height;
794     in_wpitch = obj_surface->width;
795     in_hpitch = obj_surface->height;
796     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
797
798     /* source Y surface index 1 */
799     index = 1;
800     pp_context->surfaces[index].s_bo = obj_surface->bo;
801     dri_bo_reference(pp_context->surfaces[index].s_bo);
802     bo = dri_bo_alloc(i965->intel.bufmgr, 
803                       "surface state", 
804                       sizeof(struct i965_surface_state), 
805                       4096);
806     assert(bo);
807     pp_context->surfaces[index].ss_bo = bo;
808     dri_bo_map(bo, True);
809     assert(bo->virtual);
810     ss = bo->virtual;
811     memset(ss, 0, sizeof(*ss));
812     ss->ss0.surface_type = I965_SURFACE_2D;
813     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
814     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
815     ss->ss2.width = in_w - 1;
816     ss->ss2.height = in_h - 1;
817     ss->ss3.pitch = in_wpitch - 1;
818     pp_set_surface_tiling(ss, tiling);
819     dri_bo_emit_reloc(bo,
820                       I915_GEM_DOMAIN_RENDER, 
821                       0,
822                       0,
823                       offsetof(struct i965_surface_state, ss1),
824                       pp_context->surfaces[index].s_bo);
825     dri_bo_unmap(bo);
826
827     /* source UV surface index 2 */
828     index = 2;
829     pp_context->surfaces[index].s_bo = obj_surface->bo;
830     dri_bo_reference(pp_context->surfaces[index].s_bo);
831     bo = dri_bo_alloc(i965->intel.bufmgr, 
832                       "surface state", 
833                       sizeof(struct i965_surface_state), 
834                       4096);
835     assert(bo);
836     pp_context->surfaces[index].ss_bo = bo;
837     dri_bo_map(bo, True);
838     assert(bo->virtual);
839     ss = bo->virtual;
840     memset(ss, 0, sizeof(*ss));
841     ss->ss0.surface_type = I965_SURFACE_2D;
842     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
843     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
844     ss->ss2.width = in_w / 2 - 1;
845     ss->ss2.height = in_h / 2 - 1;
846     ss->ss3.pitch = in_wpitch - 1;
847     pp_set_surface_tiling(ss, tiling);
848     dri_bo_emit_reloc(bo,
849                       I915_GEM_DOMAIN_RENDER, 
850                       0,
851                       in_wpitch * in_hpitch,
852                       offsetof(struct i965_surface_state, ss1),
853                       pp_context->surfaces[index].s_bo);
854     dri_bo_unmap(bo);
855
856     /* destination surface */
857     obj_surface = SURFACE(out_surface_id);
858     out_w = obj_surface->orig_width;
859     out_h = obj_surface->orig_height;
860     out_wpitch = obj_surface->width;
861     out_hpitch = obj_surface->height;
862     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
863
864     /* destination Y surface index 7 */
865     index = 7;
866     pp_context->surfaces[index].s_bo = obj_surface->bo;
867     dri_bo_reference(pp_context->surfaces[index].s_bo);
868     bo = dri_bo_alloc(i965->intel.bufmgr, 
869                       "surface state", 
870                       sizeof(struct i965_surface_state), 
871                       4096);
872     assert(bo);
873     pp_context->surfaces[index].ss_bo = bo;
874     dri_bo_map(bo, True);
875     assert(bo->virtual);
876     ss = bo->virtual;
877     memset(ss, 0, sizeof(*ss));
878     ss->ss0.surface_type = I965_SURFACE_2D;
879     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
880     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
881     ss->ss2.width = out_w / 4 - 1;
882     ss->ss2.height = out_h - 1;
883     ss->ss3.pitch = out_wpitch - 1;
884     pp_set_surface_tiling(ss, tiling);
885     dri_bo_emit_reloc(bo,
886                       I915_GEM_DOMAIN_RENDER, 
887                       I915_GEM_DOMAIN_RENDER,
888                       0,
889                       offsetof(struct i965_surface_state, ss1),
890                       pp_context->surfaces[index].s_bo);
891     dri_bo_unmap(bo);
892
893     /* destination UV surface index 8 */
894     index = 8;
895     pp_context->surfaces[index].s_bo = obj_surface->bo;
896     dri_bo_reference(pp_context->surfaces[index].s_bo);
897     bo = dri_bo_alloc(i965->intel.bufmgr, 
898                       "surface state", 
899                       sizeof(struct i965_surface_state), 
900                       4096);
901     assert(bo);
902     pp_context->surfaces[index].ss_bo = bo;
903     dri_bo_map(bo, True);
904     assert(bo->virtual);
905     ss = bo->virtual;
906     memset(ss, 0, sizeof(*ss));
907     ss->ss0.surface_type = I965_SURFACE_2D;
908     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
909     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
910     ss->ss2.width = out_w / 4 - 1;
911     ss->ss2.height = out_h / 2 - 1;
912     ss->ss3.pitch = out_wpitch - 1;
913     pp_set_surface_tiling(ss, tiling);
914     dri_bo_emit_reloc(bo,
915                       I915_GEM_DOMAIN_RENDER, 
916                       I915_GEM_DOMAIN_RENDER,
917                       out_wpitch * out_hpitch,
918                       offsetof(struct i965_surface_state, ss1),
919                       pp_context->surfaces[index].s_bo);
920     dri_bo_unmap(bo);
921
922     /* sampler state */
923     dri_bo_map(pp_context->sampler_state_table.bo, True);
924     assert(pp_context->sampler_state_table.bo->virtual);
925     sampler_state = pp_context->sampler_state_table.bo->virtual;
926
927     /* SIMD16 Y index 1 */
928     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
929     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
930     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
931     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
932     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
933
934     /* SIMD16 UV index 2 */
935     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
936     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
937     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
938     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
939     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
940
941     dri_bo_unmap(pp_context->sampler_state_table.bo);
942
943     /* private function & data */
944     pp_context->pp_x_steps = pp_scaling_x_steps;
945     pp_context->pp_y_steps = pp_scaling_y_steps;
946     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
947
948     pp_scaling_context->dest_x = dst_rect->x;
949     pp_scaling_context->dest_y = dst_rect->y;
950     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
951     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
952     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
953     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
954
955     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
956
957     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
958     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
959     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
960     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
961     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
962 }
963
964 static int
965 pp_avs_x_steps(void *private_context)
966 {
967     struct pp_avs_context *pp_avs_context = private_context;
968
969     return pp_avs_context->dest_w / 16;
970 }
971
972 static int
973 pp_avs_y_steps(void *private_context)
974 {
975     return 1;
976 }
977
978 static int
979 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
980 {
981     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
982     float src_x_steping, src_y_steping, video_step_delta;
983     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
984
985     if (tmp_w >= pp_avs_context->dest_w) {
986         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
987         pp_inline_parameter.grf6.video_step_delta = 0;
988         
989         if (x == 0) {
990             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
991                 pp_avs_context->src_normalized_x;
992         } else {
993             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
994             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
995             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
996                 16 * 15 * video_step_delta / 2;
997         }
998     } else {
999         int n0, n1, n2, nls_left, nls_right;
1000         int factor_a = 5, factor_b = 4;
1001         float f;
1002
1003         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1004         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1005         n2 = tmp_w / (16 * factor_a);
1006         nls_left = n0 + n2;
1007         nls_right = n1 + n2;
1008         f = (float) n2 * 16 / tmp_w;
1009         
1010         if (n0 < 5) {
1011             pp_inline_parameter.grf6.video_step_delta = 0.0;
1012
1013             if (x == 0) {
1014                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1015                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1016             } else {
1017                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1018                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1019                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1020                     16 * 15 * video_step_delta / 2;
1021             }
1022         } else {
1023             if (x < nls_left) {
1024                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1025                 float a = f / (nls_left * 16 * factor_b);
1026                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1027                 
1028                 pp_inline_parameter.grf6.video_step_delta = b;
1029
1030                 if (x == 0) {
1031                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1032                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1033                 } else {
1034                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1035                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1036                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1037                         16 * 15 * video_step_delta / 2;
1038                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1039                 }
1040             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1041                 /* scale the center linearly */
1042                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1043                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1044                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1045                     16 * 15 * video_step_delta / 2;
1046                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1047                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1048             } else {
1049                 float a = f / (nls_right * 16 * factor_b);
1050                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1051
1052                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1053                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1054                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1055                     16 * 15 * video_step_delta / 2;
1056                 pp_inline_parameter.grf6.video_step_delta = -b;
1057
1058                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1059                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1060                 else
1061                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1062             }
1063         }
1064     }
1065
1066     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1067     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1068     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1069     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1070
1071     return 0;
1072 }
1073
1074 static void
1075 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1076                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1077                        const VARectangle *src_rect, const VARectangle *dst_rect)
1078 {
1079     struct i965_driver_data *i965 = i965_driver_data(ctx);
1080     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1081     struct object_surface *obj_surface;
1082     struct i965_surface_state *ss;
1083     struct i965_sampler_8x8 *sampler_8x8;
1084     struct i965_sampler_8x8_state *sampler_8x8_state;
1085     struct i965_surface_state2 *ss_8x8;
1086     dri_bo *bo;
1087     int index;
1088     int in_w, in_h, in_wpitch, in_hpitch;
1089     int out_w, out_h, out_wpitch, out_hpitch;
1090     unsigned int tiling, swizzle;
1091
1092     /* surface */
1093     obj_surface = SURFACE(in_surface_id);
1094     in_w = obj_surface->orig_width;
1095     in_h = obj_surface->orig_height;
1096     in_wpitch = obj_surface->width;
1097     in_hpitch = obj_surface->height;
1098     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1099
1100     /* source Y surface index 1 */
1101     index = 1;
1102     pp_context->surfaces[index].s_bo = obj_surface->bo;
1103     dri_bo_reference(pp_context->surfaces[index].s_bo);
1104     bo = dri_bo_alloc(i965->intel.bufmgr, 
1105                       "Y surface state for sample_8x8", 
1106                       sizeof(struct i965_surface_state2), 
1107                       4096);
1108     assert(bo);
1109     pp_context->surfaces[index].ss_bo = bo;
1110     dri_bo_map(bo, True);
1111     assert(bo->virtual);
1112     ss_8x8 = bo->virtual;
1113     memset(ss_8x8, 0, sizeof(*ss_8x8));
1114     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1115     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1116     ss_8x8->ss1.width = in_w - 1;
1117     ss_8x8->ss1.height = in_h - 1;
1118     ss_8x8->ss2.half_pitch_for_chroma = 0;
1119     ss_8x8->ss2.pitch = in_wpitch - 1;
1120     ss_8x8->ss2.interleave_chroma = 0;
1121     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1122     ss_8x8->ss3.x_offset_for_cb = 0;
1123     ss_8x8->ss3.y_offset_for_cb = 0;
1124     pp_set_surface2_tiling(ss_8x8, tiling);
1125     dri_bo_emit_reloc(bo,
1126                       I915_GEM_DOMAIN_RENDER, 
1127                       0,
1128                       0,
1129                       offsetof(struct i965_surface_state2, ss0),
1130                       pp_context->surfaces[index].s_bo);
1131     dri_bo_unmap(bo);
1132
1133     /* source UV surface index 2 */
1134     index = 2;
1135     pp_context->surfaces[index].s_bo = obj_surface->bo;
1136     dri_bo_reference(pp_context->surfaces[index].s_bo);
1137     bo = dri_bo_alloc(i965->intel.bufmgr, 
1138                       "UV surface state for sample_8x8", 
1139                       sizeof(struct i965_surface_state2), 
1140                       4096);
1141     assert(bo);
1142     pp_context->surfaces[index].ss_bo = bo;
1143     dri_bo_map(bo, True);
1144     assert(bo->virtual);
1145     ss_8x8 = bo->virtual;
1146     memset(ss_8x8, 0, sizeof(*ss_8x8));
1147     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
1148     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1149     ss_8x8->ss1.width = in_w - 1;
1150     ss_8x8->ss1.height = in_h - 1;
1151     ss_8x8->ss2.half_pitch_for_chroma = 0;
1152     ss_8x8->ss2.pitch = in_wpitch - 1;
1153     ss_8x8->ss2.interleave_chroma = 1;
1154     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1155     ss_8x8->ss3.x_offset_for_cb = 0;
1156     ss_8x8->ss3.y_offset_for_cb = 0;
1157     pp_set_surface2_tiling(ss_8x8, tiling);
1158     dri_bo_emit_reloc(bo,
1159                       I915_GEM_DOMAIN_RENDER, 
1160                       0,
1161                       in_wpitch * in_hpitch,
1162                       offsetof(struct i965_surface_state2, ss0),
1163                       pp_context->surfaces[index].s_bo);
1164     dri_bo_unmap(bo);
1165
1166     /* destination surface */
1167     obj_surface = SURFACE(out_surface_id);
1168     out_w = obj_surface->orig_width;
1169     out_h = obj_surface->orig_height;
1170     out_wpitch = obj_surface->width;
1171     out_hpitch = obj_surface->height;
1172     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1173     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1174
1175     /* destination Y surface index 7 */
1176     index = 7;
1177     pp_context->surfaces[index].s_bo = obj_surface->bo;
1178     dri_bo_reference(pp_context->surfaces[index].s_bo);
1179     bo = dri_bo_alloc(i965->intel.bufmgr, 
1180                       "surface state", 
1181                       sizeof(struct i965_surface_state), 
1182                       4096);
1183     assert(bo);
1184     pp_context->surfaces[index].ss_bo = bo;
1185     dri_bo_map(bo, True);
1186     assert(bo->virtual);
1187     ss = bo->virtual;
1188     memset(ss, 0, sizeof(*ss));
1189     ss->ss0.surface_type = I965_SURFACE_2D;
1190     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1191     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1192     ss->ss2.width = out_w / 4 - 1;
1193     ss->ss2.height = out_h - 1;
1194     ss->ss3.pitch = out_wpitch - 1;
1195     pp_set_surface_tiling(ss, tiling);
1196     dri_bo_emit_reloc(bo,
1197                       I915_GEM_DOMAIN_RENDER, 
1198                       I915_GEM_DOMAIN_RENDER,
1199                       0,
1200                       offsetof(struct i965_surface_state, ss1),
1201                       pp_context->surfaces[index].s_bo);
1202     dri_bo_unmap(bo);
1203
1204     /* destination UV surface index 8 */
1205     index = 8;
1206     pp_context->surfaces[index].s_bo = obj_surface->bo;
1207     dri_bo_reference(pp_context->surfaces[index].s_bo);
1208     bo = dri_bo_alloc(i965->intel.bufmgr, 
1209                       "surface state", 
1210                       sizeof(struct i965_surface_state), 
1211                       4096);
1212     assert(bo);
1213     pp_context->surfaces[index].ss_bo = bo;
1214     dri_bo_map(bo, True);
1215     assert(bo->virtual);
1216     ss = bo->virtual;
1217     memset(ss, 0, sizeof(*ss));
1218     ss->ss0.surface_type = I965_SURFACE_2D;
1219     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1220     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
1221     ss->ss2.width = out_w / 4 - 1;
1222     ss->ss2.height = out_h / 2 - 1;
1223     ss->ss3.pitch = out_wpitch - 1;
1224     pp_set_surface_tiling(ss, tiling);
1225     dri_bo_emit_reloc(bo,
1226                       I915_GEM_DOMAIN_RENDER, 
1227                       I915_GEM_DOMAIN_RENDER,
1228                       out_wpitch * out_hpitch,
1229                       offsetof(struct i965_surface_state, ss1),
1230                       pp_context->surfaces[index].s_bo);
1231     dri_bo_unmap(bo);
1232     
1233     /* sampler 8x8 state */
1234     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1235     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1236     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1237     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1238     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1239     sampler_8x8_state->dw136.default_sharpness_level = 0;
1240     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1241     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1242     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1243     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1244
1245     /* sampler 8x8 */
1246     dri_bo_map(pp_context->sampler_state_table.bo, True);
1247     assert(pp_context->sampler_state_table.bo->virtual);
1248     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1249     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1250
1251     /* sample_8x8 Y index 1 */
1252     index = 1;
1253     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1254     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1255     sampler_8x8[index].dw0.ief_bypass = 0;
1256     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1257     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1258     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1259     sampler_8x8[index].dw2.global_noise_estimation = 22;
1260     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1261     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1262     sampler_8x8[index].dw3.strong_edge_weight = 7;
1263     sampler_8x8[index].dw3.regular_weight = 2;
1264     sampler_8x8[index].dw3.non_edge_weight = 0;
1265     sampler_8x8[index].dw3.gain_factor = 40;
1266     sampler_8x8[index].dw4.steepness_boost = 0;
1267     sampler_8x8[index].dw4.steepness_threshold = 0;
1268     sampler_8x8[index].dw4.mr_boost = 0;
1269     sampler_8x8[index].dw4.mr_threshold = 5;
1270     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1271     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1272     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1273     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1274     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1275     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1276     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1277     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1278     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1279     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1280     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1281     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1282     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1283     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1284     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1285     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1286     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1287     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1288     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1289     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1290     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1291     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1292     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1293     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1294     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1295     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1296     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1297     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1298     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1299     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1300     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1301     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1302     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1303     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1304     sampler_8x8[index].dw13.limiter_boost = 0;
1305     sampler_8x8[index].dw13.minimum_limiter = 10;
1306     sampler_8x8[index].dw13.maximum_limiter = 11;
1307     sampler_8x8[index].dw14.clip_limiter = 130;
1308     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1309                       I915_GEM_DOMAIN_RENDER, 
1310                       0,
1311                       0,
1312                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1313                       pp_context->sampler_state_table.bo_8x8);
1314
1315     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1316     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1317     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1318     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1319     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1320     sampler_8x8_state->dw136.default_sharpness_level = 0;
1321     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1322     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1323     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1324     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1325
1326     /* sample_8x8 UV index 2 */
1327     index = 2;
1328     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1329     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1330     sampler_8x8[index].dw0.ief_bypass = 0;
1331     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1332     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1333     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1334     sampler_8x8[index].dw2.global_noise_estimation = 22;
1335     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1336     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1337     sampler_8x8[index].dw3.strong_edge_weight = 7;
1338     sampler_8x8[index].dw3.regular_weight = 2;
1339     sampler_8x8[index].dw3.non_edge_weight = 0;
1340     sampler_8x8[index].dw3.gain_factor = 40;
1341     sampler_8x8[index].dw4.steepness_boost = 0;
1342     sampler_8x8[index].dw4.steepness_threshold = 0;
1343     sampler_8x8[index].dw4.mr_boost = 0;
1344     sampler_8x8[index].dw4.mr_threshold = 5;
1345     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1346     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1347     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1348     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1349     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1350     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1351     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1352     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1353     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1354     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1355     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1356     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1357     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1358     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1359     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1360     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1361     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1362     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1363     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1364     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1365     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1366     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1367     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1368     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1369     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1370     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1371     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1372     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1373     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1374     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1375     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1376     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1377     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1378     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1379     sampler_8x8[index].dw13.limiter_boost = 0;
1380     sampler_8x8[index].dw13.minimum_limiter = 10;
1381     sampler_8x8[index].dw13.maximum_limiter = 11;
1382     sampler_8x8[index].dw14.clip_limiter = 130;
1383     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1384                       I915_GEM_DOMAIN_RENDER, 
1385                       0,
1386                       0,
1387                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1388                       pp_context->sampler_state_table.bo_8x8_uv);
1389
1390     dri_bo_unmap(pp_context->sampler_state_table.bo);
1391
1392     /* private function & data */
1393     pp_context->pp_x_steps = pp_avs_x_steps;
1394     pp_context->pp_y_steps = pp_avs_y_steps;
1395     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1396
1397     pp_avs_context->dest_x = dst_rect->x;
1398     pp_avs_context->dest_y = dst_rect->y;
1399     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1400     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1401     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1402     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1403     pp_avs_context->src_w = src_rect->width;
1404     pp_avs_context->src_h = src_rect->height;
1405
1406     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1407     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1408
1409     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1410     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1411     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1412     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1413     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1414     pp_inline_parameter.grf6.video_step_delta = 0.0;
1415 }
1416
1417 static int
1418 pp_dndi_x_steps(void *private_context)
1419 {
1420     return 1;
1421 }
1422
1423 static int
1424 pp_dndi_y_steps(void *private_context)
1425 {
1426     struct pp_dndi_context *pp_dndi_context = private_context;
1427
1428     return pp_dndi_context->dest_h / 4;
1429 }
1430
1431 static int
1432 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1433 {
1434     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1435     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1436
1437     return 0;
1438 }
1439
1440 static 
1441 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1442                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1443                              const VARectangle *src_rect, const VARectangle *dst_rect)
1444 {
1445     struct i965_driver_data *i965 = i965_driver_data(ctx);
1446     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1447     struct object_surface *obj_surface;
1448     struct i965_surface_state *ss;
1449     struct i965_surface_state2 *ss_dndi;
1450     struct i965_sampler_dndi *sampler_dndi;
1451     dri_bo *bo;
1452     int index;
1453     int w, h;
1454     int orig_w, orig_h;
1455     unsigned int tiling, swizzle;
1456
1457     /* surface */
1458     obj_surface = SURFACE(in_surface_id);
1459     orig_w = obj_surface->orig_width;
1460     orig_h = obj_surface->orig_height;
1461     w = obj_surface->width;
1462     h = obj_surface->height;
1463     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1464
1465     if (pp_context->stmm.bo == NULL) {
1466         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1467                                            "STMM surface",
1468                                            w * h,
1469                                            4096);
1470         assert(pp_context->stmm.bo);
1471     }
1472
1473     /* source UV surface index 2 */
1474     index = 2;
1475     pp_context->surfaces[index].s_bo = obj_surface->bo;
1476     dri_bo_reference(pp_context->surfaces[index].s_bo);
1477     bo = dri_bo_alloc(i965->intel.bufmgr, 
1478                       "surface state", 
1479                       sizeof(struct i965_surface_state), 
1480                       4096);
1481     assert(bo);
1482     pp_context->surfaces[index].ss_bo = bo;
1483     dri_bo_map(bo, True);
1484     assert(bo->virtual);
1485     ss = bo->virtual;
1486     memset(ss, 0, sizeof(*ss));
1487     ss->ss0.surface_type = I965_SURFACE_2D;
1488     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1489     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1490     ss->ss2.width = orig_w / 4 - 1;
1491     ss->ss2.height = orig_h / 2 - 1;
1492     ss->ss3.pitch = w - 1;
1493     pp_set_surface_tiling(ss, tiling);
1494     dri_bo_emit_reloc(bo,
1495                       I915_GEM_DOMAIN_RENDER, 
1496                       0,
1497                       w * h,
1498                       offsetof(struct i965_surface_state, ss1),
1499                       pp_context->surfaces[index].s_bo);
1500     dri_bo_unmap(bo);
1501
1502     /* source YUV surface index 4 */
1503     index = 4;
1504     pp_context->surfaces[index].s_bo = obj_surface->bo;
1505     dri_bo_reference(pp_context->surfaces[index].s_bo);
1506     bo = dri_bo_alloc(i965->intel.bufmgr, 
1507                       "YUV surface state for deinterlace ", 
1508                       sizeof(struct i965_surface_state2), 
1509                       4096);
1510     assert(bo);
1511     pp_context->surfaces[index].ss_bo = bo;
1512     dri_bo_map(bo, True);
1513     assert(bo->virtual);
1514     ss_dndi = bo->virtual;
1515     memset(ss_dndi, 0, sizeof(*ss_dndi));
1516     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1517     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1518     ss_dndi->ss1.width = w - 1;
1519     ss_dndi->ss1.height = h - 1;
1520     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1521     ss_dndi->ss2.half_pitch_for_chroma = 0;
1522     ss_dndi->ss2.pitch = w - 1;
1523     ss_dndi->ss2.interleave_chroma = 1;
1524     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1525     ss_dndi->ss2.half_pitch_for_chroma = 0;
1526     ss_dndi->ss2.tiled_surface = 0;
1527     ss_dndi->ss3.x_offset_for_cb = 0;
1528     ss_dndi->ss3.y_offset_for_cb = h;
1529     pp_set_surface2_tiling(ss_dndi, tiling);
1530     dri_bo_emit_reloc(bo,
1531                       I915_GEM_DOMAIN_RENDER, 
1532                       0,
1533                       0,
1534                       offsetof(struct i965_surface_state2, ss0),
1535                       pp_context->surfaces[index].s_bo);
1536     dri_bo_unmap(bo);
1537
1538     /* source STMM surface index 20 */
1539     index = 20;
1540     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1541     dri_bo_reference(pp_context->surfaces[index].s_bo);
1542     bo = dri_bo_alloc(i965->intel.bufmgr, 
1543                       "STMM surface state for deinterlace ", 
1544                       sizeof(struct i965_surface_state2), 
1545                       4096);
1546     assert(bo);
1547     pp_context->surfaces[index].ss_bo = bo;
1548     dri_bo_map(bo, True);
1549     assert(bo->virtual);
1550     ss = bo->virtual;
1551     memset(ss, 0, sizeof(*ss));
1552     ss->ss0.surface_type = I965_SURFACE_2D;
1553     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1554     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1555     ss->ss2.width = w - 1;
1556     ss->ss2.height = h - 1;
1557     ss->ss3.pitch = w - 1;
1558     dri_bo_emit_reloc(bo,
1559                       I915_GEM_DOMAIN_RENDER, 
1560                       I915_GEM_DOMAIN_RENDER,
1561                       0,
1562                       offsetof(struct i965_surface_state, ss1),
1563                       pp_context->surfaces[index].s_bo);
1564     dri_bo_unmap(bo);
1565
1566     /* destination surface */
1567     obj_surface = SURFACE(out_surface_id);
1568     orig_w = obj_surface->orig_width;
1569     orig_h = obj_surface->orig_height;
1570     w = obj_surface->width;
1571     h = obj_surface->height;
1572     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1573
1574     /* destination Y surface index 7 */
1575     index = 7;
1576     pp_context->surfaces[index].s_bo = obj_surface->bo;
1577     dri_bo_reference(pp_context->surfaces[index].s_bo);
1578     bo = dri_bo_alloc(i965->intel.bufmgr, 
1579                       "surface state", 
1580                       sizeof(struct i965_surface_state), 
1581                       4096);
1582     assert(bo);
1583     pp_context->surfaces[index].ss_bo = bo;
1584     dri_bo_map(bo, True);
1585     assert(bo->virtual);
1586     ss = bo->virtual;
1587     memset(ss, 0, sizeof(*ss));
1588     ss->ss0.surface_type = I965_SURFACE_2D;
1589     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1590     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1591     ss->ss2.width = orig_w / 4 - 1;
1592     ss->ss2.height = orig_h - 1;
1593     ss->ss3.pitch = w - 1;
1594     pp_set_surface_tiling(ss, tiling);
1595     dri_bo_emit_reloc(bo,
1596                       I915_GEM_DOMAIN_RENDER, 
1597                       I915_GEM_DOMAIN_RENDER,
1598                       0,
1599                       offsetof(struct i965_surface_state, ss1),
1600                       pp_context->surfaces[index].s_bo);
1601     dri_bo_unmap(bo);
1602
1603     /* destination UV surface index 8 */
1604     index = 8;
1605     pp_context->surfaces[index].s_bo = obj_surface->bo;
1606     dri_bo_reference(pp_context->surfaces[index].s_bo);
1607     bo = dri_bo_alloc(i965->intel.bufmgr, 
1608                       "surface state", 
1609                       sizeof(struct i965_surface_state), 
1610                       4096);
1611     assert(bo);
1612     pp_context->surfaces[index].ss_bo = bo;
1613     dri_bo_map(bo, True);
1614     assert(bo->virtual);
1615     ss = bo->virtual;
1616     memset(ss, 0, sizeof(*ss));
1617     ss->ss0.surface_type = I965_SURFACE_2D;
1618     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1619     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1620     ss->ss2.width = orig_w / 4 - 1;
1621     ss->ss2.height = orig_h / 2 - 1;
1622     ss->ss3.pitch = w - 1;
1623     pp_set_surface_tiling(ss, tiling);
1624     dri_bo_emit_reloc(bo,
1625                       I915_GEM_DOMAIN_RENDER, 
1626                       I915_GEM_DOMAIN_RENDER,
1627                       w * h,
1628                       offsetof(struct i965_surface_state, ss1),
1629                       pp_context->surfaces[index].s_bo);
1630     dri_bo_unmap(bo);
1631
1632     /* sampler dndi */
1633     dri_bo_map(pp_context->sampler_state_table.bo, True);
1634     assert(pp_context->sampler_state_table.bo->virtual);
1635     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1636     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1637
1638     /* sample dndi index 1 */
1639     index = 0;
1640     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1641     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1642     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1643     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1644
1645     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1646     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1647     sampler_dndi[index].dw1.stmm_c2 = 0;
1648     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1649     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1650
1651     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1652     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1653     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1654     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1655
1656     sampler_dndi[index].dw3.maximum_stmm = 128;
1657     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1658     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1659     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1660     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1661
1662     sampler_dndi[index].dw4.sdi_delta = 8;
1663     sampler_dndi[index].dw4.sdi_threshold = 128;
1664     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1665     sampler_dndi[index].dw4.stmm_shift_up = 0;
1666     sampler_dndi[index].dw4.stmm_shift_down = 0;
1667     sampler_dndi[index].dw4.minimum_stmm = 0;
1668
1669     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1670     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1671     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1672     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1673
1674     sampler_dndi[index].dw6.dn_enable = 1;
1675     sampler_dndi[index].dw6.di_enable = 1;
1676     sampler_dndi[index].dw6.di_partial = 0;
1677     sampler_dndi[index].dw6.dndi_top_first = 1;
1678     sampler_dndi[index].dw6.dndi_stream_id = 1;
1679     sampler_dndi[index].dw6.dndi_first_frame = 1;
1680     sampler_dndi[index].dw6.progressive_dn = 0;
1681     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1682     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1683     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1684
1685     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1686     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1687     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1688     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1689
1690     dri_bo_unmap(pp_context->sampler_state_table.bo);
1691
1692     /* private function & data */
1693     pp_context->pp_x_steps = pp_dndi_x_steps;
1694     pp_context->pp_y_steps = pp_dndi_y_steps;
1695     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1696
1697     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1698     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1699     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1700     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1701
1702     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1703     pp_inline_parameter.grf5.number_blocks = w / 16;
1704     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1705     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1706
1707     pp_dndi_context->dest_w = w;
1708     pp_dndi_context->dest_h = h;
1709 }
1710
1711 static void
1712 ironlake_pp_initialize(
1713     VADriverContextP   ctx,
1714     struct i965_post_processing_context *pp_context,
1715     VASurfaceID        in_surface_id,
1716     VASurfaceID        out_surface_id,
1717     const VARectangle *src_rect,
1718     const VARectangle *dst_rect,
1719     int                pp_index
1720 )
1721 {
1722     struct i965_driver_data *i965 = i965_driver_data(ctx);
1723     struct pp_module *pp_module;
1724     dri_bo *bo;
1725     int i;
1726
1727     dri_bo_unreference(pp_context->curbe.bo);
1728     bo = dri_bo_alloc(i965->intel.bufmgr,
1729                       "constant buffer",
1730                       4096, 
1731                       4096);
1732     assert(bo);
1733     pp_context->curbe.bo = bo;
1734
1735     dri_bo_unreference(pp_context->binding_table.bo);
1736     bo = dri_bo_alloc(i965->intel.bufmgr, 
1737                       "binding table",
1738                       sizeof(unsigned int), 
1739                       4096);
1740     assert(bo);
1741     pp_context->binding_table.bo = bo;
1742
1743     dri_bo_unreference(pp_context->idrt.bo);
1744     bo = dri_bo_alloc(i965->intel.bufmgr, 
1745                       "interface discriptor", 
1746                       sizeof(struct i965_interface_descriptor), 
1747                       4096);
1748     assert(bo);
1749     pp_context->idrt.bo = bo;
1750     pp_context->idrt.num_interface_descriptors = 0;
1751
1752     dri_bo_unreference(pp_context->sampler_state_table.bo);
1753     bo = dri_bo_alloc(i965->intel.bufmgr, 
1754                       "sampler state table", 
1755                       4096,
1756                       4096);
1757     assert(bo);
1758     dri_bo_map(bo, True);
1759     memset(bo->virtual, 0, bo->size);
1760     dri_bo_unmap(bo);
1761     pp_context->sampler_state_table.bo = bo;
1762
1763     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1764     bo = dri_bo_alloc(i965->intel.bufmgr, 
1765                       "sampler 8x8 state ",
1766                       4096,
1767                       4096);
1768     assert(bo);
1769     pp_context->sampler_state_table.bo_8x8 = bo;
1770
1771     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1772     bo = dri_bo_alloc(i965->intel.bufmgr, 
1773                       "sampler 8x8 state ",
1774                       4096,
1775                       4096);
1776     assert(bo);
1777     pp_context->sampler_state_table.bo_8x8_uv = bo;
1778
1779     dri_bo_unreference(pp_context->vfe_state.bo);
1780     bo = dri_bo_alloc(i965->intel.bufmgr, 
1781                       "vfe state", 
1782                       sizeof(struct i965_vfe_state), 
1783                       4096);
1784     assert(bo);
1785     pp_context->vfe_state.bo = bo;
1786     
1787     for (i = 0; i < MAX_PP_SURFACES; i++) {
1788         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1789         pp_context->surfaces[i].ss_bo = NULL;
1790
1791         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1792         pp_context->surfaces[i].s_bo = NULL;
1793     }
1794
1795     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1796     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1797     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1798     pp_context->current_pp = pp_index;
1799     pp_module = &pp_context->pp_modules[pp_index];
1800     
1801     if (pp_module->initialize)
1802         pp_module->initialize(ctx, pp_context,
1803                               in_surface_id, out_surface_id,
1804                               src_rect, dst_rect);
1805 }
1806
1807 static void
1808 ironlake_post_processing(
1809     VADriverContextP   ctx,
1810     struct i965_post_processing_context *pp_context,
1811     VASurfaceID        in_surface_id,
1812     VASurfaceID        out_surface_id,
1813     const VARectangle *src_rect,
1814     const VARectangle *dst_rect,
1815     int                pp_index
1816 )
1817 {
1818     ironlake_pp_initialize(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1819     ironlake_pp_states_setup(ctx, pp_context);
1820     ironlake_pp_pipeline_setup(ctx, pp_context);
1821 }
1822
1823 static void
1824 gen6_pp_initialize(
1825     VADriverContextP   ctx,
1826     struct i965_post_processing_context *pp_context,
1827     VASurfaceID        in_surface_id,
1828     VASurfaceID        out_surface_id,
1829     const VARectangle *src_rect,
1830     const VARectangle *dst_rect,
1831     int                pp_index
1832 )
1833 {
1834     struct i965_driver_data *i965 = i965_driver_data(ctx);
1835     struct pp_module *pp_module;
1836     dri_bo *bo;
1837     int i;
1838
1839     dri_bo_unreference(pp_context->curbe.bo);
1840     bo = dri_bo_alloc(i965->intel.bufmgr,
1841                       "constant buffer",
1842                       4096, 
1843                       4096);
1844     assert(bo);
1845     pp_context->curbe.bo = bo;
1846
1847     dri_bo_unreference(pp_context->binding_table.bo);
1848     bo = dri_bo_alloc(i965->intel.bufmgr, 
1849                       "binding table",
1850                       sizeof(unsigned int), 
1851                       4096);
1852     assert(bo);
1853     pp_context->binding_table.bo = bo;
1854
1855     dri_bo_unreference(pp_context->idrt.bo);
1856     bo = dri_bo_alloc(i965->intel.bufmgr, 
1857                       "interface discriptor", 
1858                       sizeof(struct gen6_interface_descriptor_data), 
1859                       4096);
1860     assert(bo);
1861     pp_context->idrt.bo = bo;
1862     pp_context->idrt.num_interface_descriptors = 0;
1863
1864     dri_bo_unreference(pp_context->sampler_state_table.bo);
1865     bo = dri_bo_alloc(i965->intel.bufmgr, 
1866                       "sampler state table", 
1867                       4096,
1868                       4096);
1869     assert(bo);
1870     dri_bo_map(bo, True);
1871     memset(bo->virtual, 0, bo->size);
1872     dri_bo_unmap(bo);
1873     pp_context->sampler_state_table.bo = bo;
1874
1875     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1876     bo = dri_bo_alloc(i965->intel.bufmgr, 
1877                       "sampler 8x8 state ",
1878                       4096,
1879                       4096);
1880     assert(bo);
1881     pp_context->sampler_state_table.bo_8x8 = bo;
1882
1883     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1884     bo = dri_bo_alloc(i965->intel.bufmgr, 
1885                       "sampler 8x8 state ",
1886                       4096,
1887                       4096);
1888     assert(bo);
1889     pp_context->sampler_state_table.bo_8x8_uv = bo;
1890
1891     dri_bo_unreference(pp_context->vfe_state.bo);
1892     bo = dri_bo_alloc(i965->intel.bufmgr, 
1893                       "vfe state", 
1894                       sizeof(struct i965_vfe_state), 
1895                       4096);
1896     assert(bo);
1897     pp_context->vfe_state.bo = bo;
1898     
1899     for (i = 0; i < MAX_PP_SURFACES; i++) {
1900         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1901         pp_context->surfaces[i].ss_bo = NULL;
1902
1903         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1904         pp_context->surfaces[i].s_bo = NULL;
1905     }
1906
1907     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1908     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1909     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1910     pp_context->current_pp = pp_index;
1911     pp_module = &pp_context->pp_modules[pp_index];
1912     
1913     if (pp_module->initialize)
1914         pp_module->initialize(ctx, pp_context,
1915                               in_surface_id, out_surface_id,
1916                               src_rect, dst_rect);
1917 }
1918
1919 static void
1920 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1921 {
1922     unsigned int *binding_table;
1923     dri_bo *bo = pp_context->binding_table.bo;
1924     int i;
1925
1926     dri_bo_map(bo, 1);
1927     assert(bo->virtual);
1928     binding_table = bo->virtual;
1929     memset(binding_table, 0, bo->size);
1930
1931     for (i = 0; i < MAX_PP_SURFACES; i++) {
1932         if (pp_context->surfaces[i].ss_bo) {
1933             assert(pp_context->surfaces[i].s_bo);
1934
1935             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1936             dri_bo_emit_reloc(bo,
1937                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1938                               0,
1939                               i * sizeof(*binding_table),
1940                               pp_context->surfaces[i].ss_bo);
1941         }
1942     
1943     }
1944
1945     dri_bo_unmap(bo);
1946 }
1947
1948 static void
1949 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1950 {
1951     struct gen6_interface_descriptor_data *desc;
1952     dri_bo *bo;
1953     int pp_index = pp_context->current_pp;
1954
1955     bo = pp_context->idrt.bo;
1956     dri_bo_map(bo, True);
1957     assert(bo->virtual);
1958     desc = bo->virtual;
1959     memset(desc, 0, sizeof(*desc));
1960     desc->desc0.kernel_start_pointer = 
1961         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1962     desc->desc1.single_program_flow = 1;
1963     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1964     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1965     desc->desc2.sampler_state_pointer = 
1966         pp_context->sampler_state_table.bo->offset >> 5;
1967     desc->desc3.binding_table_entry_count = 0;
1968     desc->desc3.binding_table_pointer = 
1969         pp_context->binding_table.bo->offset >> 5; /*reloc */
1970     desc->desc4.constant_urb_entry_read_offset = 0;
1971     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1972
1973     dri_bo_emit_reloc(bo,
1974                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1975                       0,
1976                       offsetof(struct gen6_interface_descriptor_data, desc0),
1977                       pp_context->pp_modules[pp_index].kernel.bo);
1978
1979     dri_bo_emit_reloc(bo,
1980                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1981                       desc->desc2.sampler_count << 2,
1982                       offsetof(struct gen6_interface_descriptor_data, desc2),
1983                       pp_context->sampler_state_table.bo);
1984
1985     dri_bo_emit_reloc(bo,
1986                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1987                       desc->desc3.binding_table_entry_count,
1988                       offsetof(struct gen6_interface_descriptor_data, desc3),
1989                       pp_context->binding_table.bo);
1990
1991     dri_bo_unmap(bo);
1992     pp_context->idrt.num_interface_descriptors++;
1993 }
1994
1995 static void
1996 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1997 {
1998     unsigned char *constant_buffer;
1999
2000     assert(sizeof(pp_static_parameter) == 128);
2001     dri_bo_map(pp_context->curbe.bo, 1);
2002     assert(pp_context->curbe.bo->virtual);
2003     constant_buffer = pp_context->curbe.bo->virtual;
2004     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2005     dri_bo_unmap(pp_context->curbe.bo);
2006 }
2007
2008 static void
2009 gen6_pp_states_setup(VADriverContextP ctx,
2010                      struct i965_post_processing_context *pp_context)
2011 {
2012     gen6_pp_binding_table(pp_context);
2013     gen6_pp_interface_descriptor_table(pp_context);
2014     gen6_pp_upload_constants(pp_context);
2015 }
2016
2017 static void
2018 gen6_pp_pipeline_select(VADriverContextP ctx,
2019                         struct i965_post_processing_context *pp_context)
2020 {
2021     struct intel_batchbuffer *batch = pp_context->batch;
2022
2023     BEGIN_BATCH(batch, 1);
2024     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2025     ADVANCE_BATCH(batch);
2026 }
2027
2028 static void
2029 gen6_pp_state_base_address(VADriverContextP ctx,
2030                            struct i965_post_processing_context *pp_context)
2031 {
2032     struct intel_batchbuffer *batch = pp_context->batch;
2033
2034     BEGIN_BATCH(batch, 10);
2035     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2036     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2037     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2038     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2039     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2040     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2041     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2042     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2043     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2044     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2045     ADVANCE_BATCH(batch);
2046 }
2047
2048 static void
2049 gen6_pp_vfe_state(VADriverContextP ctx,
2050                   struct i965_post_processing_context *pp_context)
2051 {
2052     struct intel_batchbuffer *batch = pp_context->batch;
2053
2054     BEGIN_BATCH(batch, 8);
2055     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2056     OUT_BATCH(batch, 0);
2057     OUT_BATCH(batch,
2058               (pp_context->urb.num_vfe_entries - 1) << 16 |
2059               pp_context->urb.num_vfe_entries << 8);
2060     OUT_BATCH(batch, 0);
2061     OUT_BATCH(batch,
2062               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2063               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2064     OUT_BATCH(batch, 0);
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch, 0);
2067     ADVANCE_BATCH(batch);
2068 }
2069
2070 static void
2071 gen6_pp_curbe_load(VADriverContextP ctx,
2072                    struct i965_post_processing_context *pp_context)
2073 {
2074     struct intel_batchbuffer *batch = pp_context->batch;
2075
2076     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2077
2078     BEGIN_BATCH(batch, 4);
2079     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2080     OUT_BATCH(batch, 0);
2081     OUT_BATCH(batch,
2082               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2083     OUT_RELOC(batch, 
2084               pp_context->curbe.bo,
2085               I915_GEM_DOMAIN_INSTRUCTION, 0,
2086               0);
2087     ADVANCE_BATCH(batch);
2088 }
2089
2090 static void
2091 gen6_interface_descriptor_load(VADriverContextP ctx,
2092                                struct i965_post_processing_context *pp_context)
2093 {
2094     struct intel_batchbuffer *batch = pp_context->batch;
2095
2096     BEGIN_BATCH(batch, 4);
2097     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2098     OUT_BATCH(batch, 0);
2099     OUT_BATCH(batch,
2100               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2101     OUT_RELOC(batch, 
2102               pp_context->idrt.bo,
2103               I915_GEM_DOMAIN_INSTRUCTION, 0,
2104               0);
2105     ADVANCE_BATCH(batch);
2106 }
2107
2108 static void
2109 gen6_pp_object_walker(VADriverContextP ctx,
2110                       struct i965_post_processing_context *pp_context)
2111 {
2112     struct intel_batchbuffer *batch = pp_context->batch;
2113     int x, x_steps, y, y_steps;
2114
2115     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2116     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2117
2118     for (y = 0; y < y_steps; y++) {
2119         for (x = 0; x < x_steps; x++) {
2120             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2121                 BEGIN_BATCH(batch, 22);
2122                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2123                 OUT_BATCH(batch, 0);
2124                 OUT_BATCH(batch, 0); /* no indirect data */
2125                 OUT_BATCH(batch, 0);
2126                 OUT_BATCH(batch, 0); /* scoreboard */
2127                 OUT_BATCH(batch, 0);
2128
2129                 /* inline data grf 5-6 */
2130                 assert(sizeof(pp_inline_parameter) == 64);
2131                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2132
2133                 ADVANCE_BATCH(batch);
2134             }
2135         }
2136     }
2137 }
2138
2139 static void
2140 gen6_pp_pipeline_setup(VADriverContextP ctx,
2141                        struct i965_post_processing_context *pp_context)
2142 {
2143     struct intel_batchbuffer *batch = pp_context->batch;
2144
2145     intel_batchbuffer_start_atomic(batch, 0x1000);
2146     intel_batchbuffer_emit_mi_flush(batch);
2147     gen6_pp_pipeline_select(ctx, pp_context);
2148     gen6_pp_curbe_load(ctx, pp_context);
2149     gen6_interface_descriptor_load(ctx, pp_context);
2150     gen6_pp_state_base_address(ctx, pp_context);
2151     gen6_pp_vfe_state(ctx, pp_context);
2152     gen6_pp_object_walker(ctx, pp_context);
2153     intel_batchbuffer_end_atomic(batch);
2154 }
2155
2156 static void
2157 gen6_post_processing(
2158     VADriverContextP   ctx,
2159     struct i965_post_processing_context *pp_context,
2160     VASurfaceID        in_surface_id,
2161     VASurfaceID        out_surface_id,
2162     const VARectangle *src_rect,
2163     const VARectangle *dst_rect,
2164     int                pp_index
2165 )
2166 {
2167     gen6_pp_initialize(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2168     gen6_pp_states_setup(ctx, pp_context);
2169     gen6_pp_pipeline_setup(ctx, pp_context);
2170 }
2171
2172 static void
2173 i965_post_processing_internal(
2174     VADriverContextP   ctx,
2175     struct i965_post_processing_context *pp_context,
2176     VASurfaceID        in_surface_id,
2177     VASurfaceID        out_surface_id,
2178     const VARectangle *src_rect,
2179     const VARectangle *dst_rect,
2180     int                pp_index
2181 )
2182 {
2183     struct i965_driver_data *i965 = i965_driver_data(ctx);
2184
2185     if (IS_GEN6(i965->intel.device_id) ||
2186         IS_GEN7(i965->intel.device_id))
2187         gen6_post_processing(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2188     else
2189         ironlake_post_processing(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2190 }
2191
2192 VAStatus 
2193 i965_DestroySurfaces(VADriverContextP ctx,
2194                      VASurfaceID *surface_list,
2195                      int num_surfaces);
2196 VAStatus 
2197 i965_CreateSurfaces(VADriverContextP ctx,
2198                     int width,
2199                     int height,
2200                     int format,
2201                     int num_surfaces,
2202                     VASurfaceID *surfaces);
2203 VASurfaceID
2204 i965_post_processing(
2205     VADriverContextP   ctx,
2206     VASurfaceID        surface,
2207     const VARectangle *src_rect,
2208     const VARectangle *dst_rect,
2209     unsigned int       flags,
2210     int               *has_done_scaling  
2211 )
2212 {
2213     struct i965_driver_data *i965 = i965_driver_data(ctx);
2214     VASurfaceID in_surface_id = surface;
2215     VASurfaceID out_surface_id = VA_INVALID_ID;
2216
2217     if (HAS_PP(i965)) {
2218         /* Currently only support post processing for NV12 surface */
2219         if (i965->render_state.interleaved_uv) {
2220             struct object_surface *obj_surface;
2221             VAStatus status;
2222
2223             if (flags & I965_PP_FLAG_DEINTERLACING) {
2224                 obj_surface = SURFACE(in_surface_id);
2225                 status = i965_CreateSurfaces(ctx,
2226                                              obj_surface->orig_width,
2227                                              obj_surface->orig_height,
2228                                              VA_RT_FORMAT_YUV420,
2229                                              1,
2230                                              &out_surface_id);
2231                 assert(status == VA_STATUS_SUCCESS);
2232                 obj_surface = SURFACE(out_surface_id);
2233                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2234                 i965_post_processing_internal(ctx, i965->pp_context,
2235                                               in_surface_id, out_surface_id,
2236                                               src_rect, dst_rect,
2237                                               PP_NV12_DNDI);
2238             }
2239
2240             if (flags & I965_PP_FLAG_AVS) {
2241                 struct i965_render_state *render_state = &i965->render_state;
2242                 struct intel_region *dest_region = render_state->draw_region;
2243
2244                 if (out_surface_id != VA_INVALID_ID)
2245                     in_surface_id = out_surface_id;
2246
2247                 status = i965_CreateSurfaces(ctx,
2248                                              dest_region->width,
2249                                              dest_region->height,
2250                                              VA_RT_FORMAT_YUV420,
2251                                              1,
2252                                              &out_surface_id);
2253                 assert(status == VA_STATUS_SUCCESS);
2254                 obj_surface = SURFACE(out_surface_id);
2255                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2256                 i965_post_processing_internal(ctx, i965->pp_context,
2257                                               in_surface_id, out_surface_id,
2258                                               src_rect, dst_rect,
2259                                               PP_NV12_AVS);
2260
2261                 if (in_surface_id != surface)
2262                     i965_DestroySurfaces(ctx, &in_surface_id, 1);
2263                 
2264                 *has_done_scaling = 1;
2265             }
2266         }
2267     }
2268
2269     return out_surface_id;
2270 }       
2271
2272 static void
2273 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
2274 {
2275     int i;
2276
2277     dri_bo_unreference(pp_context->curbe.bo);
2278     pp_context->curbe.bo = NULL;
2279
2280     for (i = 0; i < MAX_PP_SURFACES; i++) {
2281         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2282         pp_context->surfaces[i].ss_bo = NULL;
2283
2284         dri_bo_unreference(pp_context->surfaces[i].s_bo);
2285         pp_context->surfaces[i].s_bo = NULL;
2286     }
2287
2288     dri_bo_unreference(pp_context->sampler_state_table.bo);
2289     pp_context->sampler_state_table.bo = NULL;
2290
2291     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2292     pp_context->sampler_state_table.bo_8x8 = NULL;
2293
2294     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2295     pp_context->sampler_state_table.bo_8x8_uv = NULL;
2296
2297     dri_bo_unreference(pp_context->binding_table.bo);
2298     pp_context->binding_table.bo = NULL;
2299
2300     dri_bo_unreference(pp_context->idrt.bo);
2301     pp_context->idrt.bo = NULL;
2302     pp_context->idrt.num_interface_descriptors = 0;
2303
2304     dri_bo_unreference(pp_context->vfe_state.bo);
2305     pp_context->vfe_state.bo = NULL;
2306
2307     dri_bo_unreference(pp_context->stmm.bo);
2308     pp_context->stmm.bo = NULL;
2309
2310     for (i = 0; i < NUM_PP_MODULES; i++) {
2311         struct pp_module *pp_module = &pp_context->pp_modules[i];
2312
2313         dri_bo_unreference(pp_module->kernel.bo);
2314         pp_module->kernel.bo = NULL;
2315     }
2316
2317 }
2318
2319 Bool
2320 i965_post_processing_terminate(VADriverContextP ctx)
2321 {
2322     struct i965_driver_data *i965 = i965_driver_data(ctx);
2323     struct i965_post_processing_context *pp_context = i965->pp_context;
2324
2325     if (pp_context) {
2326         i965_post_processing_context_finalize(pp_context);
2327         free(pp_context);
2328     }
2329
2330     i965->pp_context = NULL;
2331
2332     return True;
2333 }
2334
2335 static void
2336 i965_post_processing_context_init(VADriverContextP ctx,
2337                                   struct i965_post_processing_context *pp_context,
2338                                   struct intel_batchbuffer *batch)
2339 {
2340     struct i965_driver_data *i965 = i965_driver_data(ctx);
2341     int i;
2342
2343     pp_context->urb.size = URB_SIZE((&i965->intel));
2344     pp_context->urb.num_vfe_entries = 32;
2345     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2346     pp_context->urb.num_cs_entries = 1;
2347     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2348     pp_context->urb.vfe_start = 0;
2349     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2350         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2351     assert(pp_context->urb.cs_start + 
2352            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2353
2354     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2355     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2356
2357     if (IS_GEN6(i965->intel.device_id) ||
2358         IS_GEN7(i965->intel.device_id))
2359         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2360     else if (IS_IRONLAKE(i965->intel.device_id))
2361         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2362
2363     for (i = 0; i < NUM_PP_MODULES; i++) {
2364         struct pp_module *pp_module = &pp_context->pp_modules[i];
2365         dri_bo_unreference(pp_module->kernel.bo);
2366         pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2367                                             pp_module->kernel.name,
2368                                             pp_module->kernel.size,
2369                                             4096);
2370         assert(pp_module->kernel.bo);
2371         dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2372     }
2373
2374     pp_context->batch = batch;
2375 }
2376
2377 Bool
2378 i965_post_processing_init(VADriverContextP ctx)
2379 {
2380     struct i965_driver_data *i965 = i965_driver_data(ctx);
2381     struct i965_post_processing_context *pp_context = i965->pp_context;
2382
2383     if (HAS_PP(i965)) {
2384         if (pp_context == NULL) {
2385             pp_context = calloc(1, sizeof(*pp_context));
2386             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2387             i965->pp_context = pp_context;
2388         }
2389     }
2390
2391     return True;
2392 }
2393
2394 static void 
2395 i965_proc_picture(VADriverContextP ctx, 
2396                   VAProfile profile, 
2397                   union codec_state *codec_state,
2398                   struct hw_context *hw_context)
2399 {
2400     struct i965_driver_data *i965 = i965_driver_data(ctx);
2401     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2402     struct proc_state *proc_state = &codec_state->proc;
2403     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2404     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2405     struct object_surface *obj_surface;
2406
2407     assert(input_param->surface != VA_INVALID_ID);
2408     assert(proc_state->current_render_target != VA_INVALID_ID);
2409
2410     obj_surface = SURFACE(proc_state->current_render_target);
2411     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2412     i965_post_processing_internal(ctx, &proc_context->pp_context,
2413                                   input_param->surface, proc_state->current_render_target,
2414                                   &input_param->region, &pipeline_param->output_region,
2415                                   PP_NV12_SCALING);
2416
2417     intel_batchbuffer_flush(hw_context->batch);
2418 }
2419
2420 static void
2421 i965_proc_context_destroy(void *hw_context)
2422 {
2423     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2424
2425     i965_post_processing_context_finalize(&proc_context->pp_context);
2426     intel_batchbuffer_free(proc_context->base.batch);
2427     free(proc_context);
2428 }
2429
2430 struct hw_context *
2431 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2432 {
2433     struct intel_driver_data *intel = intel_driver_data(ctx);
2434     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2435
2436     proc_context->base.destroy = i965_proc_context_destroy;
2437     proc_context->base.run = i965_proc_picture;
2438     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2439     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2440
2441     return (struct hw_context *)proc_context;
2442 }