Avoid depending on va_backend.h for some files
[platform/upstream/libva-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_post_processing.h"
40 #include "i965_render.h"
41
42 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
43                      IS_GEN6((ctx)->intel.device_id) ||         \
44                      IS_GEN7((ctx)->intel.device_id))
45
46 static const uint32_t pp_null_gen5[][4] = {
47 #include "shaders/post_processing/null.g4b.gen5"
48 };
49
50 static const uint32_t pp_nv12_load_save_gen5[][4] = {
51 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
52 };
53
54 static const uint32_t pp_nv12_scaling_gen5[][4] = {
55 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
56 };
57
58 static const uint32_t pp_nv12_avs_gen5[][4] = {
59 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
60 };
61
62 static const uint32_t pp_nv12_dndi_gen5[][4] = {
63 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
64 };
65
66 static void pp_null_initialize(VADriverContextP ctx,
67                                VASurfaceID in_surface_id, VASurfaceID out_surface_id,
68                                const VARectangle *src_rect, const VARectangle *dst_rect);
69 static void pp_nv12_avs_initialize(VADriverContextP ctx,
70                                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
71                                    const VARectangle *src_rect, const VARectangle *dst_rect);
72 static void pp_nv12_scaling_initialize(VADriverContextP ctx,
73                                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
74                                        const VARectangle *src_rect, const VARectangle *dst_rect);
75 static void pp_nv12_load_save_initialize(VADriverContextP ctx,
76                                          VASurfaceID in_surface_id, VASurfaceID out_surface_id,
77                                          const VARectangle *src_rect, const VARectangle *dst_rect);
78 static void pp_nv12_dndi_initialize(VADriverContextP ctx,
79                                     VASurfaceID in_surface_id, VASurfaceID out_surface_id,
80                                     const VARectangle *src_rect, const VARectangle *dst_rect);
81
82 static struct pp_module pp_modules_gen5[] = {
83     {
84         {
85             "NULL module (for testing)",
86             PP_NULL,
87             pp_null_gen5,
88             sizeof(pp_null_gen5),
89             NULL,
90         },
91
92         pp_null_initialize,
93     },
94
95     {
96         {
97             "NV12 Load & Save module",
98             PP_NV12_LOAD_SAVE,
99             pp_nv12_load_save_gen5,
100             sizeof(pp_nv12_load_save_gen5),
101             NULL,
102         },
103
104         pp_nv12_load_save_initialize,
105     },
106
107     {
108         {
109             "NV12 Scaling module",
110             PP_NV12_SCALING,
111             pp_nv12_scaling_gen5,
112             sizeof(pp_nv12_scaling_gen5),
113             NULL,
114         },
115
116         pp_nv12_scaling_initialize,
117     },
118
119     {
120         {
121             "NV12 AVS module",
122             PP_NV12_AVS,
123             pp_nv12_avs_gen5,
124             sizeof(pp_nv12_avs_gen5),
125             NULL,
126         },
127
128         pp_nv12_avs_initialize,
129     },
130
131     {
132         {
133             "NV12 DNDI module",
134             PP_NV12_DNDI,
135             pp_nv12_dndi_gen5,
136             sizeof(pp_nv12_dndi_gen5),
137             NULL,
138         },
139
140         pp_nv12_dndi_initialize,
141     },
142 };
143
144 static const uint32_t pp_null_gen6[][4] = {
145 #include "shaders/post_processing/null.g6b"
146 };
147
148 static const uint32_t pp_nv12_load_save_gen6[][4] = {
149 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
150 };
151
152 static const uint32_t pp_nv12_scaling_gen6[][4] = {
153 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
154 };
155
156 static const uint32_t pp_nv12_avs_gen6[][4] = {
157 #include "shaders/post_processing/nv12_avs_nv12.g6b"
158 };
159
160 static const uint32_t pp_nv12_dndi_gen6[][4] = {
161 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
162 };
163
164 static struct pp_module pp_modules_gen6[] = {
165     {
166         {
167             "NULL module (for testing)",
168             PP_NULL,
169             pp_null_gen6,
170             sizeof(pp_null_gen6),
171             NULL,
172         },
173
174         pp_null_initialize,
175     },
176
177     {
178         {
179             "NV12 Load & Save module",
180             PP_NV12_LOAD_SAVE,
181             pp_nv12_load_save_gen6,
182             sizeof(pp_nv12_load_save_gen6),
183             NULL,
184         },
185
186         pp_nv12_load_save_initialize,
187     },
188
189     {
190         {
191             "NV12 Scaling module",
192             PP_NV12_SCALING,
193             pp_nv12_scaling_gen6,
194             sizeof(pp_nv12_scaling_gen6),
195             NULL,
196         },
197
198         pp_nv12_scaling_initialize,
199     },
200
201     {
202         {
203             "NV12 AVS module",
204             PP_NV12_AVS,
205             pp_nv12_avs_gen6,
206             sizeof(pp_nv12_avs_gen6),
207             NULL,
208         },
209
210         pp_nv12_avs_initialize,
211     },
212
213     {
214         {
215             "NV12 DNDI module",
216             PP_NV12_DNDI,
217             pp_nv12_dndi_gen6,
218             sizeof(pp_nv12_dndi_gen6),
219             NULL,
220         },
221
222         pp_nv12_dndi_initialize,
223     },
224 };
225
226 #define pp_static_parameter     pp_context->pp_static_parameter
227 #define pp_inline_parameter     pp_context->pp_inline_parameter
228
229 static void
230 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
231 {
232     switch (tiling) {
233     case I915_TILING_NONE:
234         ss->ss3.tiled_surface = 0;
235         ss->ss3.tile_walk = 0;
236         break;
237     case I915_TILING_X:
238         ss->ss3.tiled_surface = 1;
239         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
240         break;
241     case I915_TILING_Y:
242         ss->ss3.tiled_surface = 1;
243         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
244         break;
245     }
246 }
247
248 static void
249 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
250 {
251     switch (tiling) {
252     case I915_TILING_NONE:
253         ss->ss2.tiled_surface = 0;
254         ss->ss2.tile_walk = 0;
255         break;
256     case I915_TILING_X:
257         ss->ss2.tiled_surface = 1;
258         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
259         break;
260     case I915_TILING_Y:
261         ss->ss2.tiled_surface = 1;
262         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
263         break;
264     }
265 }
266
267 static void
268 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
269 {
270
271 }
272
273 static void
274 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
275 {
276     struct i965_interface_descriptor *desc;
277     dri_bo *bo;
278     int pp_index = pp_context->current_pp;
279
280     bo = pp_context->idrt.bo;
281     dri_bo_map(bo, 1);
282     assert(bo->virtual);
283     desc = bo->virtual;
284     memset(desc, 0, sizeof(*desc));
285     desc->desc0.grf_reg_blocks = 10;
286     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
287     desc->desc1.const_urb_entry_read_offset = 0;
288     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
289     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
290     desc->desc2.sampler_count = 0;
291     desc->desc3.binding_table_entry_count = 0;
292     desc->desc3.binding_table_pointer = 
293         pp_context->binding_table.bo->offset >> 5; /*reloc */
294
295     dri_bo_emit_reloc(bo,
296                       I915_GEM_DOMAIN_INSTRUCTION, 0,
297                       desc->desc0.grf_reg_blocks,
298                       offsetof(struct i965_interface_descriptor, desc0),
299                       pp_context->pp_modules[pp_index].kernel.bo);
300
301     dri_bo_emit_reloc(bo,
302                       I915_GEM_DOMAIN_INSTRUCTION, 0,
303                       desc->desc2.sampler_count << 2,
304                       offsetof(struct i965_interface_descriptor, desc2),
305                       pp_context->sampler_state_table.bo);
306
307     dri_bo_emit_reloc(bo,
308                       I915_GEM_DOMAIN_INSTRUCTION, 0,
309                       desc->desc3.binding_table_entry_count,
310                       offsetof(struct i965_interface_descriptor, desc3),
311                       pp_context->binding_table.bo);
312
313     dri_bo_unmap(bo);
314     pp_context->idrt.num_interface_descriptors++;
315 }
316
317 static void
318 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
319 {
320     unsigned int *binding_table;
321     dri_bo *bo = pp_context->binding_table.bo;
322     int i;
323
324     dri_bo_map(bo, 1);
325     assert(bo->virtual);
326     binding_table = bo->virtual;
327     memset(binding_table, 0, bo->size);
328
329     for (i = 0; i < MAX_PP_SURFACES; i++) {
330         if (pp_context->surfaces[i].ss_bo) {
331             assert(pp_context->surfaces[i].s_bo);
332
333             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
334             dri_bo_emit_reloc(bo,
335                               I915_GEM_DOMAIN_INSTRUCTION, 0,
336                               0,
337                               i * sizeof(*binding_table),
338                               pp_context->surfaces[i].ss_bo);
339         }
340     
341     }
342
343     dri_bo_unmap(bo);
344 }
345
346 static void
347 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
348 {
349     struct i965_vfe_state *vfe_state;
350     dri_bo *bo;
351
352     bo = pp_context->vfe_state.bo;
353     dri_bo_map(bo, 1);
354     assert(bo->virtual);
355     vfe_state = bo->virtual;
356     memset(vfe_state, 0, sizeof(*vfe_state));
357     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
358     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
359     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
360     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
361     vfe_state->vfe1.children_present = 0;
362     vfe_state->vfe2.interface_descriptor_base = 
363         pp_context->idrt.bo->offset >> 4; /* reloc */
364     dri_bo_emit_reloc(bo,
365                       I915_GEM_DOMAIN_INSTRUCTION, 0,
366                       0,
367                       offsetof(struct i965_vfe_state, vfe2),
368                       pp_context->idrt.bo);
369     dri_bo_unmap(bo);
370 }
371
372 static void
373 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
374 {
375     unsigned char *constant_buffer;
376
377     assert(sizeof(pp_static_parameter) == 128);
378     dri_bo_map(pp_context->curbe.bo, 1);
379     assert(pp_context->curbe.bo->virtual);
380     constant_buffer = pp_context->curbe.bo->virtual;
381     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
382     dri_bo_unmap(pp_context->curbe.bo);
383 }
384
385 static void
386 ironlake_pp_states_setup(VADriverContextP ctx)
387 {
388     struct i965_driver_data *i965 = i965_driver_data(ctx);
389     struct i965_post_processing_context *pp_context = i965->pp_context;
390
391     ironlake_pp_surface_state(pp_context);
392     ironlake_pp_binding_table(pp_context);
393     ironlake_pp_interface_descriptor_table(pp_context);
394     ironlake_pp_vfe_state(pp_context);
395     ironlake_pp_upload_constants(pp_context);
396 }
397
398 static void
399 ironlake_pp_pipeline_select(VADriverContextP ctx)
400 {
401     struct i965_driver_data *i965 = i965_driver_data(ctx);
402     struct intel_batchbuffer *batch = i965->batch;
403
404     BEGIN_BATCH(batch, 1);
405     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
406     ADVANCE_BATCH(batch);
407 }
408
409 static void
410 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
411 {
412     struct i965_driver_data *i965 = i965_driver_data(ctx);
413     struct intel_batchbuffer *batch = i965->batch;
414     unsigned int vfe_fence, cs_fence;
415
416     vfe_fence = pp_context->urb.cs_start;
417     cs_fence = pp_context->urb.size;
418
419     BEGIN_BATCH(batch, 3);
420     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
421     OUT_BATCH(batch, 0);
422     OUT_BATCH(batch, 
423               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
424               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
425     ADVANCE_BATCH(batch);
426 }
427
428 static void
429 ironlake_pp_state_base_address(VADriverContextP ctx)
430 {
431     struct i965_driver_data *i965 = i965_driver_data(ctx);
432     struct intel_batchbuffer *batch = i965->batch;
433
434     BEGIN_BATCH(batch, 8);
435     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
436     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
437     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     ADVANCE_BATCH(batch);
444 }
445
446 static void
447 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
448 {
449     struct i965_driver_data *i965 = i965_driver_data(ctx);
450     struct intel_batchbuffer *batch = i965->batch;
451
452     BEGIN_BATCH(batch, 3);
453     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
454     OUT_BATCH(batch, 0);
455     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
456     ADVANCE_BATCH(batch);
457 }
458
459 static void 
460 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
461 {
462     struct i965_driver_data *i965 = i965_driver_data(ctx);
463     struct intel_batchbuffer *batch = i965->batch;
464
465     BEGIN_BATCH(batch, 2);
466     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
467     OUT_BATCH(batch,
468               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
469               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
470     ADVANCE_BATCH(batch);
471 }
472
473 static void
474 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
475 {
476     struct i965_driver_data *i965 = i965_driver_data(ctx);
477     struct intel_batchbuffer *batch = i965->batch;
478
479     BEGIN_BATCH(batch, 2);
480     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
481     OUT_RELOC(batch, pp_context->curbe.bo,
482               I915_GEM_DOMAIN_INSTRUCTION, 0,
483               pp_context->urb.size_cs_entry - 1);
484     ADVANCE_BATCH(batch);    
485 }
486
487 static void
488 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
489 {
490     struct i965_driver_data *i965 = i965_driver_data(ctx);
491     struct intel_batchbuffer *batch = i965->batch;
492     int x, x_steps, y, y_steps;
493
494     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
495     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
496
497     for (y = 0; y < y_steps; y++) {
498         for (x = 0; x < x_steps; x++) {
499             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
500                 BEGIN_BATCH(batch, 20);
501                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
502                 OUT_BATCH(batch, 0);
503                 OUT_BATCH(batch, 0); /* no indirect data */
504                 OUT_BATCH(batch, 0);
505
506                 /* inline data grf 5-6 */
507                 assert(sizeof(pp_inline_parameter) == 64);
508                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
509
510                 ADVANCE_BATCH(batch);
511             }
512         }
513     }
514 }
515
516 static void
517 ironlake_pp_pipeline_setup(VADriverContextP ctx)
518 {
519     struct i965_driver_data *i965 = i965_driver_data(ctx);
520     struct intel_batchbuffer *batch = i965->batch;
521     struct i965_post_processing_context *pp_context = i965->pp_context;
522
523     intel_batchbuffer_start_atomic(batch, 0x1000);
524     intel_batchbuffer_emit_mi_flush(batch);
525     ironlake_pp_pipeline_select(ctx);
526     ironlake_pp_state_base_address(ctx);
527     ironlake_pp_state_pointers(ctx, pp_context);
528     ironlake_pp_urb_layout(ctx, pp_context);
529     ironlake_pp_cs_urb_layout(ctx, pp_context);
530     ironlake_pp_constant_buffer(ctx, pp_context);
531     ironlake_pp_object_walker(ctx, pp_context);
532     intel_batchbuffer_end_atomic(batch);
533 }
534
535 static int
536 pp_null_x_steps(void *private_context)
537 {
538     return 1;
539 }
540
541 static int
542 pp_null_y_steps(void *private_context)
543 {
544     return 1;
545 }
546
547 static int
548 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
549 {
550     return 0;
551 }
552
553 static void
554 pp_null_initialize(VADriverContextP ctx, 
555                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
556                    const VARectangle *src_rect, const VARectangle *dst_rect)
557 {
558     struct i965_driver_data *i965 = i965_driver_data(ctx);
559     struct i965_post_processing_context *pp_context = i965->pp_context;
560
561     /* private function & data */
562     pp_context->pp_x_steps = pp_null_x_steps;
563     pp_context->pp_y_steps = pp_null_y_steps;
564     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
565 }
566
567 static int
568 pp_load_save_x_steps(void *private_context)
569 {
570     return 1;
571 }
572
573 static int
574 pp_load_save_y_steps(void *private_context)
575 {
576     struct pp_load_save_context *pp_load_save_context = private_context;
577
578     return pp_load_save_context->dest_h / 8;
579 }
580
581 static int
582 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
583 {
584     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
585     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
586     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
587     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
588
589     return 0;
590 }
591
592 static void
593 pp_nv12_load_save_initialize(VADriverContextP ctx,
594                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
595                              const VARectangle *src_rect, const VARectangle *dst_rect)
596 {
597     struct i965_driver_data *i965 = i965_driver_data(ctx);
598     struct i965_post_processing_context *pp_context = i965->pp_context;
599     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
600     struct object_surface *obj_surface;
601     struct i965_surface_state *ss;
602     dri_bo *bo;
603     int index, w, h;
604     int orig_w, orig_h;
605     unsigned int tiling, swizzle;
606
607     /* source surface */
608     obj_surface = SURFACE(in_surface_id);
609     orig_w = obj_surface->orig_width;
610     orig_h = obj_surface->orig_height;
611     w = obj_surface->width;
612     h = obj_surface->height;
613     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
614
615     /* source Y surface index 1 */
616     index = 1;
617     pp_context->surfaces[index].s_bo = obj_surface->bo;
618     dri_bo_reference(pp_context->surfaces[index].s_bo);
619     bo = dri_bo_alloc(i965->intel.bufmgr, 
620                       "surface state", 
621                       sizeof(struct i965_surface_state), 
622                       4096);
623     assert(bo);
624     pp_context->surfaces[index].ss_bo = bo;
625     dri_bo_map(bo, True);
626     assert(bo->virtual);
627     ss = bo->virtual;
628     memset(ss, 0, sizeof(*ss));
629     ss->ss0.surface_type = I965_SURFACE_2D;
630     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
631     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
632     ss->ss2.width = orig_w / 4 - 1;
633     ss->ss2.height = orig_h - 1;
634     ss->ss3.pitch = w - 1;
635     pp_set_surface_tiling(ss, tiling);
636     dri_bo_emit_reloc(bo,
637                       I915_GEM_DOMAIN_RENDER, 
638                       0,
639                       0,
640                       offsetof(struct i965_surface_state, ss1),
641                       pp_context->surfaces[index].s_bo);
642     dri_bo_unmap(bo);
643
644     /* source UV surface index 2 */
645     index = 2;
646     pp_context->surfaces[index].s_bo = obj_surface->bo;
647     dri_bo_reference(pp_context->surfaces[index].s_bo);
648     bo = dri_bo_alloc(i965->intel.bufmgr, 
649                       "surface state", 
650                       sizeof(struct i965_surface_state), 
651                       4096);
652     assert(bo);
653     pp_context->surfaces[index].ss_bo = bo;
654     dri_bo_map(bo, True);
655     assert(bo->virtual);
656     ss = bo->virtual;
657     memset(ss, 0, sizeof(*ss));
658     ss->ss0.surface_type = I965_SURFACE_2D;
659     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
660     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
661     ss->ss2.width = orig_w / 4 - 1;
662     ss->ss2.height = orig_h / 2 - 1;
663     ss->ss3.pitch = w - 1;
664     pp_set_surface_tiling(ss, tiling);
665     dri_bo_emit_reloc(bo,
666                       I915_GEM_DOMAIN_RENDER, 
667                       0,
668                       w * h,
669                       offsetof(struct i965_surface_state, ss1),
670                       pp_context->surfaces[index].s_bo);
671     dri_bo_unmap(bo);
672
673     /* destination surface */
674     obj_surface = SURFACE(out_surface_id);
675     orig_w = obj_surface->orig_width;
676     orig_h = obj_surface->orig_height;
677     w = obj_surface->width;
678     h = obj_surface->height;
679     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
680
681     /* destination Y surface index 7 */
682     index = 7;
683     pp_context->surfaces[index].s_bo = obj_surface->bo;
684     dri_bo_reference(pp_context->surfaces[index].s_bo);
685     bo = dri_bo_alloc(i965->intel.bufmgr, 
686                       "surface state", 
687                       sizeof(struct i965_surface_state), 
688                       4096);
689     assert(bo);
690     pp_context->surfaces[index].ss_bo = bo;
691     dri_bo_map(bo, True);
692     assert(bo->virtual);
693     ss = bo->virtual;
694     memset(ss, 0, sizeof(*ss));
695     ss->ss0.surface_type = I965_SURFACE_2D;
696     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
697     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
698     ss->ss2.width = orig_w / 4 - 1;
699     ss->ss2.height = orig_h - 1;
700     ss->ss3.pitch = w - 1;
701     pp_set_surface_tiling(ss, tiling);
702     dri_bo_emit_reloc(bo,
703                       I915_GEM_DOMAIN_RENDER, 
704                       I915_GEM_DOMAIN_RENDER,
705                       0,
706                       offsetof(struct i965_surface_state, ss1),
707                       pp_context->surfaces[index].s_bo);
708     dri_bo_unmap(bo);
709
710     /* destination UV surface index 8 */
711     index = 8;
712     pp_context->surfaces[index].s_bo = obj_surface->bo;
713     dri_bo_reference(pp_context->surfaces[index].s_bo);
714     bo = dri_bo_alloc(i965->intel.bufmgr, 
715                       "surface state", 
716                       sizeof(struct i965_surface_state), 
717                       4096);
718     assert(bo);
719     pp_context->surfaces[index].ss_bo = bo;
720     dri_bo_map(bo, True);
721     assert(bo->virtual);
722     ss = bo->virtual;
723     memset(ss, 0, sizeof(*ss));
724     ss->ss0.surface_type = I965_SURFACE_2D;
725     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
726     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
727     ss->ss2.width = orig_w / 4 - 1;
728     ss->ss2.height = orig_h / 2 - 1;
729     ss->ss3.pitch = w - 1;
730     pp_set_surface_tiling(ss, tiling);
731     dri_bo_emit_reloc(bo,
732                       I915_GEM_DOMAIN_RENDER, 
733                       I915_GEM_DOMAIN_RENDER,
734                       w * h,
735                       offsetof(struct i965_surface_state, ss1),
736                       pp_context->surfaces[index].s_bo);
737     dri_bo_unmap(bo);
738
739     /* private function & data */
740     pp_context->pp_x_steps = pp_load_save_x_steps;
741     pp_context->pp_y_steps = pp_load_save_y_steps;
742     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
743     pp_load_save_context->dest_h = h;
744     pp_load_save_context->dest_w = w;
745
746     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
747     pp_inline_parameter.grf5.number_blocks = w / 16;
748 }
749
750 static int
751 pp_scaling_x_steps(void *private_context)
752 {
753     return 1;
754 }
755
756 static int
757 pp_scaling_y_steps(void *private_context)
758 {
759     struct pp_scaling_context *pp_scaling_context = private_context;
760
761     return pp_scaling_context->dest_h / 8;
762 }
763
764 static int
765 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
766 {
767     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
768     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
769     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
770
771     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
772     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
773     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
774     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
775     
776     return 0;
777 }
778
779 static void
780 pp_nv12_scaling_initialize(VADriverContextP ctx,
781                            VASurfaceID in_surface_id, VASurfaceID out_surface_id,
782                            const VARectangle *src_rect, const VARectangle *dst_rect)
783 {
784     struct i965_driver_data *i965 = i965_driver_data(ctx);
785     struct i965_post_processing_context *pp_context = i965->pp_context;
786     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
787     struct object_surface *obj_surface;
788     struct i965_sampler_state *sampler_state;
789     struct i965_surface_state *ss;
790     dri_bo *bo;
791     int index;
792     int in_w, in_h, in_wpitch, in_hpitch;
793     int out_w, out_h, out_wpitch, out_hpitch;
794     unsigned int tiling, swizzle;
795
796     /* source surface */
797     obj_surface = SURFACE(in_surface_id);
798     in_w = obj_surface->orig_width;
799     in_h = obj_surface->orig_height;
800     in_wpitch = obj_surface->width;
801     in_hpitch = obj_surface->height;
802     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
803
804     /* source Y surface index 1 */
805     index = 1;
806     pp_context->surfaces[index].s_bo = obj_surface->bo;
807     dri_bo_reference(pp_context->surfaces[index].s_bo);
808     bo = dri_bo_alloc(i965->intel.bufmgr, 
809                       "surface state", 
810                       sizeof(struct i965_surface_state), 
811                       4096);
812     assert(bo);
813     pp_context->surfaces[index].ss_bo = bo;
814     dri_bo_map(bo, True);
815     assert(bo->virtual);
816     ss = bo->virtual;
817     memset(ss, 0, sizeof(*ss));
818     ss->ss0.surface_type = I965_SURFACE_2D;
819     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
820     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
821     ss->ss2.width = in_w - 1;
822     ss->ss2.height = in_h - 1;
823     ss->ss3.pitch = in_wpitch - 1;
824     pp_set_surface_tiling(ss, tiling);
825     dri_bo_emit_reloc(bo,
826                       I915_GEM_DOMAIN_RENDER, 
827                       0,
828                       0,
829                       offsetof(struct i965_surface_state, ss1),
830                       pp_context->surfaces[index].s_bo);
831     dri_bo_unmap(bo);
832
833     /* source UV surface index 2 */
834     index = 2;
835     pp_context->surfaces[index].s_bo = obj_surface->bo;
836     dri_bo_reference(pp_context->surfaces[index].s_bo);
837     bo = dri_bo_alloc(i965->intel.bufmgr, 
838                       "surface state", 
839                       sizeof(struct i965_surface_state), 
840                       4096);
841     assert(bo);
842     pp_context->surfaces[index].ss_bo = bo;
843     dri_bo_map(bo, True);
844     assert(bo->virtual);
845     ss = bo->virtual;
846     memset(ss, 0, sizeof(*ss));
847     ss->ss0.surface_type = I965_SURFACE_2D;
848     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
849     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
850     ss->ss2.width = in_w / 2 - 1;
851     ss->ss2.height = in_h / 2 - 1;
852     ss->ss3.pitch = in_wpitch - 1;
853     pp_set_surface_tiling(ss, tiling);
854     dri_bo_emit_reloc(bo,
855                       I915_GEM_DOMAIN_RENDER, 
856                       0,
857                       in_wpitch * in_hpitch,
858                       offsetof(struct i965_surface_state, ss1),
859                       pp_context->surfaces[index].s_bo);
860     dri_bo_unmap(bo);
861
862     /* destination surface */
863     obj_surface = SURFACE(out_surface_id);
864     out_w = obj_surface->orig_width;
865     out_h = obj_surface->orig_height;
866     out_wpitch = obj_surface->width;
867     out_hpitch = obj_surface->height;
868     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
869
870     /* destination Y surface index 7 */
871     index = 7;
872     pp_context->surfaces[index].s_bo = obj_surface->bo;
873     dri_bo_reference(pp_context->surfaces[index].s_bo);
874     bo = dri_bo_alloc(i965->intel.bufmgr, 
875                       "surface state", 
876                       sizeof(struct i965_surface_state), 
877                       4096);
878     assert(bo);
879     pp_context->surfaces[index].ss_bo = bo;
880     dri_bo_map(bo, True);
881     assert(bo->virtual);
882     ss = bo->virtual;
883     memset(ss, 0, sizeof(*ss));
884     ss->ss0.surface_type = I965_SURFACE_2D;
885     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
886     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
887     ss->ss2.width = out_w / 4 - 1;
888     ss->ss2.height = out_h - 1;
889     ss->ss3.pitch = out_wpitch - 1;
890     pp_set_surface_tiling(ss, tiling);
891     dri_bo_emit_reloc(bo,
892                       I915_GEM_DOMAIN_RENDER, 
893                       I915_GEM_DOMAIN_RENDER,
894                       0,
895                       offsetof(struct i965_surface_state, ss1),
896                       pp_context->surfaces[index].s_bo);
897     dri_bo_unmap(bo);
898
899     /* destination UV surface index 8 */
900     index = 8;
901     pp_context->surfaces[index].s_bo = obj_surface->bo;
902     dri_bo_reference(pp_context->surfaces[index].s_bo);
903     bo = dri_bo_alloc(i965->intel.bufmgr, 
904                       "surface state", 
905                       sizeof(struct i965_surface_state), 
906                       4096);
907     assert(bo);
908     pp_context->surfaces[index].ss_bo = bo;
909     dri_bo_map(bo, True);
910     assert(bo->virtual);
911     ss = bo->virtual;
912     memset(ss, 0, sizeof(*ss));
913     ss->ss0.surface_type = I965_SURFACE_2D;
914     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
915     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
916     ss->ss2.width = out_w / 4 - 1;
917     ss->ss2.height = out_h / 2 - 1;
918     ss->ss3.pitch = out_wpitch - 1;
919     pp_set_surface_tiling(ss, tiling);
920     dri_bo_emit_reloc(bo,
921                       I915_GEM_DOMAIN_RENDER, 
922                       I915_GEM_DOMAIN_RENDER,
923                       out_wpitch * out_hpitch,
924                       offsetof(struct i965_surface_state, ss1),
925                       pp_context->surfaces[index].s_bo);
926     dri_bo_unmap(bo);
927
928     /* sampler state */
929     dri_bo_map(pp_context->sampler_state_table.bo, True);
930     assert(pp_context->sampler_state_table.bo->virtual);
931     sampler_state = pp_context->sampler_state_table.bo->virtual;
932
933     /* SIMD16 Y index 1 */
934     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
935     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
936     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
937     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
938     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
939
940     /* SIMD16 UV index 2 */
941     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
942     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
943     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
944     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
945     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
946
947     dri_bo_unmap(pp_context->sampler_state_table.bo);
948
949     /* private function & data */
950     pp_context->pp_x_steps = pp_scaling_x_steps;
951     pp_context->pp_y_steps = pp_scaling_y_steps;
952     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
953
954     pp_scaling_context->dest_x = dst_rect->x;
955     pp_scaling_context->dest_y = dst_rect->y;
956     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
957     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
958     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
959     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
960
961     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
962
963     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
964     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
965     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
966     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
967     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
968 }
969
970 static int
971 pp_avs_x_steps(void *private_context)
972 {
973     struct pp_avs_context *pp_avs_context = private_context;
974
975     return pp_avs_context->dest_w / 16;
976 }
977
978 static int
979 pp_avs_y_steps(void *private_context)
980 {
981     return 1;
982 }
983
984 static int
985 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
986 {
987     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
988     float src_x_steping, src_y_steping, video_step_delta;
989     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
990
991     if (tmp_w >= pp_avs_context->dest_w) {
992         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
993         pp_inline_parameter.grf6.video_step_delta = 0;
994         
995         if (x == 0) {
996             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
997                 pp_avs_context->src_normalized_x;
998         } else {
999             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1000             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1001             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1002                 16 * 15 * video_step_delta / 2;
1003         }
1004     } else {
1005         int n0, n1, n2, nls_left, nls_right;
1006         int factor_a = 5, factor_b = 4;
1007         float f;
1008
1009         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1010         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1011         n2 = tmp_w / (16 * factor_a);
1012         nls_left = n0 + n2;
1013         nls_right = n1 + n2;
1014         f = (float) n2 * 16 / tmp_w;
1015         
1016         if (n0 < 5) {
1017             pp_inline_parameter.grf6.video_step_delta = 0.0;
1018
1019             if (x == 0) {
1020                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1021                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1022             } else {
1023                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1024                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1025                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1026                     16 * 15 * video_step_delta / 2;
1027             }
1028         } else {
1029             if (x < nls_left) {
1030                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1031                 float a = f / (nls_left * 16 * factor_b);
1032                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1033                 
1034                 pp_inline_parameter.grf6.video_step_delta = b;
1035
1036                 if (x == 0) {
1037                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1038                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1039                 } else {
1040                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1041                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1042                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1043                         16 * 15 * video_step_delta / 2;
1044                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1045                 }
1046             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1047                 /* scale the center linearly */
1048                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1049                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1050                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1051                     16 * 15 * video_step_delta / 2;
1052                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1053                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1054             } else {
1055                 float a = f / (nls_right * 16 * factor_b);
1056                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1057
1058                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1059                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1060                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1061                     16 * 15 * video_step_delta / 2;
1062                 pp_inline_parameter.grf6.video_step_delta = -b;
1063
1064                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1065                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1066                 else
1067                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1068             }
1069         }
1070     }
1071
1072     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1073     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1074     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1075     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1076
1077     return 0;
1078 }
1079
1080 static void
1081 pp_nv12_avs_initialize(VADriverContextP ctx,
1082                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1083                        const VARectangle *src_rect, const VARectangle *dst_rect)
1084 {
1085     struct i965_driver_data *i965 = i965_driver_data(ctx);
1086     struct i965_post_processing_context *pp_context = i965->pp_context;
1087     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1088     struct object_surface *obj_surface;
1089     struct i965_surface_state *ss;
1090     struct i965_sampler_8x8 *sampler_8x8;
1091     struct i965_sampler_8x8_state *sampler_8x8_state;
1092     struct i965_surface_state2 *ss_8x8;
1093     dri_bo *bo;
1094     int index;
1095     int in_w, in_h, in_wpitch, in_hpitch;
1096     int out_w, out_h, out_wpitch, out_hpitch;
1097     unsigned int tiling, swizzle;
1098
1099     /* surface */
1100     obj_surface = SURFACE(in_surface_id);
1101     in_w = obj_surface->orig_width;
1102     in_h = obj_surface->orig_height;
1103     in_wpitch = obj_surface->width;
1104     in_hpitch = obj_surface->height;
1105     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1106
1107     /* source Y surface index 1 */
1108     index = 1;
1109     pp_context->surfaces[index].s_bo = obj_surface->bo;
1110     dri_bo_reference(pp_context->surfaces[index].s_bo);
1111     bo = dri_bo_alloc(i965->intel.bufmgr, 
1112                       "Y surface state for sample_8x8", 
1113                       sizeof(struct i965_surface_state2), 
1114                       4096);
1115     assert(bo);
1116     pp_context->surfaces[index].ss_bo = bo;
1117     dri_bo_map(bo, True);
1118     assert(bo->virtual);
1119     ss_8x8 = bo->virtual;
1120     memset(ss_8x8, 0, sizeof(*ss_8x8));
1121     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1122     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1123     ss_8x8->ss1.width = in_w - 1;
1124     ss_8x8->ss1.height = in_h - 1;
1125     ss_8x8->ss2.half_pitch_for_chroma = 0;
1126     ss_8x8->ss2.pitch = in_wpitch - 1;
1127     ss_8x8->ss2.interleave_chroma = 0;
1128     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1129     ss_8x8->ss3.x_offset_for_cb = 0;
1130     ss_8x8->ss3.y_offset_for_cb = 0;
1131     pp_set_surface2_tiling(ss_8x8, tiling);
1132     dri_bo_emit_reloc(bo,
1133                       I915_GEM_DOMAIN_RENDER, 
1134                       0,
1135                       0,
1136                       offsetof(struct i965_surface_state2, ss0),
1137                       pp_context->surfaces[index].s_bo);
1138     dri_bo_unmap(bo);
1139
1140     /* source UV surface index 2 */
1141     index = 2;
1142     pp_context->surfaces[index].s_bo = obj_surface->bo;
1143     dri_bo_reference(pp_context->surfaces[index].s_bo);
1144     bo = dri_bo_alloc(i965->intel.bufmgr, 
1145                       "UV surface state for sample_8x8", 
1146                       sizeof(struct i965_surface_state2), 
1147                       4096);
1148     assert(bo);
1149     pp_context->surfaces[index].ss_bo = bo;
1150     dri_bo_map(bo, True);
1151     assert(bo->virtual);
1152     ss_8x8 = bo->virtual;
1153     memset(ss_8x8, 0, sizeof(*ss_8x8));
1154     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
1155     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1156     ss_8x8->ss1.width = in_w - 1;
1157     ss_8x8->ss1.height = in_h - 1;
1158     ss_8x8->ss2.half_pitch_for_chroma = 0;
1159     ss_8x8->ss2.pitch = in_wpitch - 1;
1160     ss_8x8->ss2.interleave_chroma = 1;
1161     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1162     ss_8x8->ss3.x_offset_for_cb = 0;
1163     ss_8x8->ss3.y_offset_for_cb = 0;
1164     pp_set_surface2_tiling(ss_8x8, tiling);
1165     dri_bo_emit_reloc(bo,
1166                       I915_GEM_DOMAIN_RENDER, 
1167                       0,
1168                       in_wpitch * in_hpitch,
1169                       offsetof(struct i965_surface_state2, ss0),
1170                       pp_context->surfaces[index].s_bo);
1171     dri_bo_unmap(bo);
1172
1173     /* destination surface */
1174     obj_surface = SURFACE(out_surface_id);
1175     out_w = obj_surface->orig_width;
1176     out_h = obj_surface->orig_height;
1177     out_wpitch = obj_surface->width;
1178     out_hpitch = obj_surface->height;
1179     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1180     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1181
1182     /* destination Y surface index 7 */
1183     index = 7;
1184     pp_context->surfaces[index].s_bo = obj_surface->bo;
1185     dri_bo_reference(pp_context->surfaces[index].s_bo);
1186     bo = dri_bo_alloc(i965->intel.bufmgr, 
1187                       "surface state", 
1188                       sizeof(struct i965_surface_state), 
1189                       4096);
1190     assert(bo);
1191     pp_context->surfaces[index].ss_bo = bo;
1192     dri_bo_map(bo, True);
1193     assert(bo->virtual);
1194     ss = bo->virtual;
1195     memset(ss, 0, sizeof(*ss));
1196     ss->ss0.surface_type = I965_SURFACE_2D;
1197     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1198     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1199     ss->ss2.width = out_w / 4 - 1;
1200     ss->ss2.height = out_h - 1;
1201     ss->ss3.pitch = out_wpitch - 1;
1202     pp_set_surface_tiling(ss, tiling);
1203     dri_bo_emit_reloc(bo,
1204                       I915_GEM_DOMAIN_RENDER, 
1205                       I915_GEM_DOMAIN_RENDER,
1206                       0,
1207                       offsetof(struct i965_surface_state, ss1),
1208                       pp_context->surfaces[index].s_bo);
1209     dri_bo_unmap(bo);
1210
1211     /* destination UV surface index 8 */
1212     index = 8;
1213     pp_context->surfaces[index].s_bo = obj_surface->bo;
1214     dri_bo_reference(pp_context->surfaces[index].s_bo);
1215     bo = dri_bo_alloc(i965->intel.bufmgr, 
1216                       "surface state", 
1217                       sizeof(struct i965_surface_state), 
1218                       4096);
1219     assert(bo);
1220     pp_context->surfaces[index].ss_bo = bo;
1221     dri_bo_map(bo, True);
1222     assert(bo->virtual);
1223     ss = bo->virtual;
1224     memset(ss, 0, sizeof(*ss));
1225     ss->ss0.surface_type = I965_SURFACE_2D;
1226     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1227     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
1228     ss->ss2.width = out_w / 4 - 1;
1229     ss->ss2.height = out_h / 2 - 1;
1230     ss->ss3.pitch = out_wpitch - 1;
1231     pp_set_surface_tiling(ss, tiling);
1232     dri_bo_emit_reloc(bo,
1233                       I915_GEM_DOMAIN_RENDER, 
1234                       I915_GEM_DOMAIN_RENDER,
1235                       out_wpitch * out_hpitch,
1236                       offsetof(struct i965_surface_state, ss1),
1237                       pp_context->surfaces[index].s_bo);
1238     dri_bo_unmap(bo);
1239     
1240     /* sampler 8x8 state */
1241     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1242     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1243     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1244     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1245     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1246     sampler_8x8_state->dw136.default_sharpness_level = 0;
1247     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1248     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1249     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1250     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1251
1252     /* sampler 8x8 */
1253     dri_bo_map(pp_context->sampler_state_table.bo, True);
1254     assert(pp_context->sampler_state_table.bo->virtual);
1255     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1256     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1257
1258     /* sample_8x8 Y index 1 */
1259     index = 1;
1260     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1261     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1262     sampler_8x8[index].dw0.ief_bypass = 0;
1263     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1264     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1265     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1266     sampler_8x8[index].dw2.global_noise_estimation = 22;
1267     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1268     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1269     sampler_8x8[index].dw3.strong_edge_weight = 7;
1270     sampler_8x8[index].dw3.regular_weight = 2;
1271     sampler_8x8[index].dw3.non_edge_weight = 0;
1272     sampler_8x8[index].dw3.gain_factor = 40;
1273     sampler_8x8[index].dw4.steepness_boost = 0;
1274     sampler_8x8[index].dw4.steepness_threshold = 0;
1275     sampler_8x8[index].dw4.mr_boost = 0;
1276     sampler_8x8[index].dw4.mr_threshold = 5;
1277     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1278     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1279     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1280     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1281     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1282     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1283     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1284     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1285     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1286     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1287     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1288     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1289     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1290     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1291     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1292     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1293     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1294     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1295     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1296     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1297     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1298     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1299     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1300     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1301     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1302     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1303     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1304     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1305     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1306     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1307     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1308     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1309     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1310     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1311     sampler_8x8[index].dw13.limiter_boost = 0;
1312     sampler_8x8[index].dw13.minimum_limiter = 10;
1313     sampler_8x8[index].dw13.maximum_limiter = 11;
1314     sampler_8x8[index].dw14.clip_limiter = 130;
1315     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1316                       I915_GEM_DOMAIN_RENDER, 
1317                       0,
1318                       0,
1319                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1320                       pp_context->sampler_state_table.bo_8x8);
1321
1322     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1323     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1324     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1325     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1326     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1327     sampler_8x8_state->dw136.default_sharpness_level = 0;
1328     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1329     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1330     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1331     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1332
1333     /* sample_8x8 UV index 2 */
1334     index = 2;
1335     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1336     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1337     sampler_8x8[index].dw0.ief_bypass = 0;
1338     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1339     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1340     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1341     sampler_8x8[index].dw2.global_noise_estimation = 22;
1342     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1343     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1344     sampler_8x8[index].dw3.strong_edge_weight = 7;
1345     sampler_8x8[index].dw3.regular_weight = 2;
1346     sampler_8x8[index].dw3.non_edge_weight = 0;
1347     sampler_8x8[index].dw3.gain_factor = 40;
1348     sampler_8x8[index].dw4.steepness_boost = 0;
1349     sampler_8x8[index].dw4.steepness_threshold = 0;
1350     sampler_8x8[index].dw4.mr_boost = 0;
1351     sampler_8x8[index].dw4.mr_threshold = 5;
1352     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1353     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1354     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1355     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1356     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1357     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1358     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1359     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1360     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1361     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1362     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1363     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1364     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1365     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1366     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1367     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1368     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1369     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1370     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1371     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1372     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1373     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1374     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1375     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1376     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1377     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1378     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1379     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1380     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1381     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1382     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1383     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1384     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1385     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1386     sampler_8x8[index].dw13.limiter_boost = 0;
1387     sampler_8x8[index].dw13.minimum_limiter = 10;
1388     sampler_8x8[index].dw13.maximum_limiter = 11;
1389     sampler_8x8[index].dw14.clip_limiter = 130;
1390     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1391                       I915_GEM_DOMAIN_RENDER, 
1392                       0,
1393                       0,
1394                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1395                       pp_context->sampler_state_table.bo_8x8_uv);
1396
1397     dri_bo_unmap(pp_context->sampler_state_table.bo);
1398
1399     /* private function & data */
1400     pp_context->pp_x_steps = pp_avs_x_steps;
1401     pp_context->pp_y_steps = pp_avs_y_steps;
1402     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1403
1404     pp_avs_context->dest_x = dst_rect->x;
1405     pp_avs_context->dest_y = dst_rect->y;
1406     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1407     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1408     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1409     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1410     pp_avs_context->src_w = src_rect->width;
1411     pp_avs_context->src_h = src_rect->height;
1412
1413     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1414     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1415
1416     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1417     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1418     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1419     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1420     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1421     pp_inline_parameter.grf6.video_step_delta = 0.0;
1422 }
1423
1424 static int
1425 pp_dndi_x_steps(void *private_context)
1426 {
1427     return 1;
1428 }
1429
1430 static int
1431 pp_dndi_y_steps(void *private_context)
1432 {
1433     struct pp_dndi_context *pp_dndi_context = private_context;
1434
1435     return pp_dndi_context->dest_h / 4;
1436 }
1437
1438 static int
1439 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1440 {
1441     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1442     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1443
1444     return 0;
1445 }
1446
1447 static 
1448 void pp_nv12_dndi_initialize(VADriverContextP ctx,
1449                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1450                              const VARectangle *src_rect, const VARectangle *dst_rect)
1451 {
1452     struct i965_driver_data *i965 = i965_driver_data(ctx);
1453     struct i965_post_processing_context *pp_context = i965->pp_context;
1454     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1455     struct object_surface *obj_surface;
1456     struct i965_surface_state *ss;
1457     struct i965_surface_state2 *ss_dndi;
1458     struct i965_sampler_dndi *sampler_dndi;
1459     dri_bo *bo;
1460     int index;
1461     int w, h;
1462     int orig_w, orig_h;
1463     unsigned int tiling, swizzle;
1464
1465     /* surface */
1466     obj_surface = SURFACE(in_surface_id);
1467     orig_w = obj_surface->orig_width;
1468     orig_h = obj_surface->orig_height;
1469     w = obj_surface->width;
1470     h = obj_surface->height;
1471     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1472
1473     if (pp_context->stmm.bo == NULL) {
1474         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1475                                            "STMM surface",
1476                                            w * h,
1477                                            4096);
1478         assert(pp_context->stmm.bo);
1479     }
1480
1481     /* source UV surface index 2 */
1482     index = 2;
1483     pp_context->surfaces[index].s_bo = obj_surface->bo;
1484     dri_bo_reference(pp_context->surfaces[index].s_bo);
1485     bo = dri_bo_alloc(i965->intel.bufmgr, 
1486                       "surface state", 
1487                       sizeof(struct i965_surface_state), 
1488                       4096);
1489     assert(bo);
1490     pp_context->surfaces[index].ss_bo = bo;
1491     dri_bo_map(bo, True);
1492     assert(bo->virtual);
1493     ss = bo->virtual;
1494     memset(ss, 0, sizeof(*ss));
1495     ss->ss0.surface_type = I965_SURFACE_2D;
1496     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1497     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1498     ss->ss2.width = orig_w / 4 - 1;
1499     ss->ss2.height = orig_h / 2 - 1;
1500     ss->ss3.pitch = w - 1;
1501     pp_set_surface_tiling(ss, tiling);
1502     dri_bo_emit_reloc(bo,
1503                       I915_GEM_DOMAIN_RENDER, 
1504                       0,
1505                       w * h,
1506                       offsetof(struct i965_surface_state, ss1),
1507                       pp_context->surfaces[index].s_bo);
1508     dri_bo_unmap(bo);
1509
1510     /* source YUV surface index 4 */
1511     index = 4;
1512     pp_context->surfaces[index].s_bo = obj_surface->bo;
1513     dri_bo_reference(pp_context->surfaces[index].s_bo);
1514     bo = dri_bo_alloc(i965->intel.bufmgr, 
1515                       "YUV surface state for deinterlace ", 
1516                       sizeof(struct i965_surface_state2), 
1517                       4096);
1518     assert(bo);
1519     pp_context->surfaces[index].ss_bo = bo;
1520     dri_bo_map(bo, True);
1521     assert(bo->virtual);
1522     ss_dndi = bo->virtual;
1523     memset(ss_dndi, 0, sizeof(*ss_dndi));
1524     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1525     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1526     ss_dndi->ss1.width = w - 1;
1527     ss_dndi->ss1.height = h - 1;
1528     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1529     ss_dndi->ss2.half_pitch_for_chroma = 0;
1530     ss_dndi->ss2.pitch = w - 1;
1531     ss_dndi->ss2.interleave_chroma = 1;
1532     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1533     ss_dndi->ss2.half_pitch_for_chroma = 0;
1534     ss_dndi->ss2.tiled_surface = 0;
1535     ss_dndi->ss3.x_offset_for_cb = 0;
1536     ss_dndi->ss3.y_offset_for_cb = h;
1537     pp_set_surface2_tiling(ss_dndi, tiling);
1538     dri_bo_emit_reloc(bo,
1539                       I915_GEM_DOMAIN_RENDER, 
1540                       0,
1541                       0,
1542                       offsetof(struct i965_surface_state2, ss0),
1543                       pp_context->surfaces[index].s_bo);
1544     dri_bo_unmap(bo);
1545
1546     /* source STMM surface index 20 */
1547     index = 20;
1548     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1549     dri_bo_reference(pp_context->surfaces[index].s_bo);
1550     bo = dri_bo_alloc(i965->intel.bufmgr, 
1551                       "STMM surface state for deinterlace ", 
1552                       sizeof(struct i965_surface_state2), 
1553                       4096);
1554     assert(bo);
1555     pp_context->surfaces[index].ss_bo = bo;
1556     dri_bo_map(bo, True);
1557     assert(bo->virtual);
1558     ss = bo->virtual;
1559     memset(ss, 0, sizeof(*ss));
1560     ss->ss0.surface_type = I965_SURFACE_2D;
1561     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1562     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1563     ss->ss2.width = w - 1;
1564     ss->ss2.height = h - 1;
1565     ss->ss3.pitch = w - 1;
1566     dri_bo_emit_reloc(bo,
1567                       I915_GEM_DOMAIN_RENDER, 
1568                       I915_GEM_DOMAIN_RENDER,
1569                       0,
1570                       offsetof(struct i965_surface_state, ss1),
1571                       pp_context->surfaces[index].s_bo);
1572     dri_bo_unmap(bo);
1573
1574     /* destination surface */
1575     obj_surface = SURFACE(out_surface_id);
1576     orig_w = obj_surface->orig_width;
1577     orig_h = obj_surface->orig_height;
1578     w = obj_surface->width;
1579     h = obj_surface->height;
1580     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1581
1582     /* destination Y surface index 7 */
1583     index = 7;
1584     pp_context->surfaces[index].s_bo = obj_surface->bo;
1585     dri_bo_reference(pp_context->surfaces[index].s_bo);
1586     bo = dri_bo_alloc(i965->intel.bufmgr, 
1587                       "surface state", 
1588                       sizeof(struct i965_surface_state), 
1589                       4096);
1590     assert(bo);
1591     pp_context->surfaces[index].ss_bo = bo;
1592     dri_bo_map(bo, True);
1593     assert(bo->virtual);
1594     ss = bo->virtual;
1595     memset(ss, 0, sizeof(*ss));
1596     ss->ss0.surface_type = I965_SURFACE_2D;
1597     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1598     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1599     ss->ss2.width = orig_w / 4 - 1;
1600     ss->ss2.height = orig_h - 1;
1601     ss->ss3.pitch = w - 1;
1602     pp_set_surface_tiling(ss, tiling);
1603     dri_bo_emit_reloc(bo,
1604                       I915_GEM_DOMAIN_RENDER, 
1605                       I915_GEM_DOMAIN_RENDER,
1606                       0,
1607                       offsetof(struct i965_surface_state, ss1),
1608                       pp_context->surfaces[index].s_bo);
1609     dri_bo_unmap(bo);
1610
1611     /* destination UV surface index 8 */
1612     index = 8;
1613     pp_context->surfaces[index].s_bo = obj_surface->bo;
1614     dri_bo_reference(pp_context->surfaces[index].s_bo);
1615     bo = dri_bo_alloc(i965->intel.bufmgr, 
1616                       "surface state", 
1617                       sizeof(struct i965_surface_state), 
1618                       4096);
1619     assert(bo);
1620     pp_context->surfaces[index].ss_bo = bo;
1621     dri_bo_map(bo, True);
1622     assert(bo->virtual);
1623     ss = bo->virtual;
1624     memset(ss, 0, sizeof(*ss));
1625     ss->ss0.surface_type = I965_SURFACE_2D;
1626     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1627     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1628     ss->ss2.width = orig_w / 4 - 1;
1629     ss->ss2.height = orig_h / 2 - 1;
1630     ss->ss3.pitch = w - 1;
1631     pp_set_surface_tiling(ss, tiling);
1632     dri_bo_emit_reloc(bo,
1633                       I915_GEM_DOMAIN_RENDER, 
1634                       I915_GEM_DOMAIN_RENDER,
1635                       w * h,
1636                       offsetof(struct i965_surface_state, ss1),
1637                       pp_context->surfaces[index].s_bo);
1638     dri_bo_unmap(bo);
1639
1640     /* sampler dndi */
1641     dri_bo_map(pp_context->sampler_state_table.bo, True);
1642     assert(pp_context->sampler_state_table.bo->virtual);
1643     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1644     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1645
1646     /* sample dndi index 1 */
1647     index = 0;
1648     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1649     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1650     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1651     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1652
1653     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1654     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1655     sampler_dndi[index].dw1.stmm_c2 = 0;
1656     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1657     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1658
1659     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1660     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1661     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1662     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1663
1664     sampler_dndi[index].dw3.maximum_stmm = 128;
1665     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1666     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1667     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1668     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1669
1670     sampler_dndi[index].dw4.sdi_delta = 8;
1671     sampler_dndi[index].dw4.sdi_threshold = 128;
1672     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1673     sampler_dndi[index].dw4.stmm_shift_up = 0;
1674     sampler_dndi[index].dw4.stmm_shift_down = 0;
1675     sampler_dndi[index].dw4.minimum_stmm = 0;
1676
1677     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1678     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1679     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1680     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1681
1682     sampler_dndi[index].dw6.dn_enable = 1;
1683     sampler_dndi[index].dw6.di_enable = 1;
1684     sampler_dndi[index].dw6.di_partial = 0;
1685     sampler_dndi[index].dw6.dndi_top_first = 1;
1686     sampler_dndi[index].dw6.dndi_stream_id = 1;
1687     sampler_dndi[index].dw6.dndi_first_frame = 1;
1688     sampler_dndi[index].dw6.progressive_dn = 0;
1689     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1690     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1691     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1692
1693     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1694     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1695     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1696     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1697
1698     dri_bo_unmap(pp_context->sampler_state_table.bo);
1699
1700     /* private function & data */
1701     pp_context->pp_x_steps = pp_dndi_x_steps;
1702     pp_context->pp_y_steps = pp_dndi_y_steps;
1703     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1704
1705     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1706     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1707     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1708     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1709
1710     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1711     pp_inline_parameter.grf5.number_blocks = w / 16;
1712     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1713     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1714
1715     pp_dndi_context->dest_w = w;
1716     pp_dndi_context->dest_h = h;
1717 }
1718
1719 static void
1720 ironlake_pp_initialize(
1721     VADriverContextP   ctx,
1722     VASurfaceID        in_surface_id,
1723     VASurfaceID        out_surface_id,
1724     const VARectangle *src_rect,
1725     const VARectangle *dst_rect,
1726     int                pp_index
1727 )
1728 {
1729     struct i965_driver_data *i965 = i965_driver_data(ctx);
1730     struct i965_post_processing_context *pp_context = i965->pp_context;
1731     struct pp_module *pp_module;
1732     dri_bo *bo;
1733     int i;
1734
1735     dri_bo_unreference(pp_context->curbe.bo);
1736     bo = dri_bo_alloc(i965->intel.bufmgr,
1737                       "constant buffer",
1738                       4096, 
1739                       4096);
1740     assert(bo);
1741     pp_context->curbe.bo = bo;
1742
1743     dri_bo_unreference(pp_context->binding_table.bo);
1744     bo = dri_bo_alloc(i965->intel.bufmgr, 
1745                       "binding table",
1746                       sizeof(unsigned int), 
1747                       4096);
1748     assert(bo);
1749     pp_context->binding_table.bo = bo;
1750
1751     dri_bo_unreference(pp_context->idrt.bo);
1752     bo = dri_bo_alloc(i965->intel.bufmgr, 
1753                       "interface discriptor", 
1754                       sizeof(struct i965_interface_descriptor), 
1755                       4096);
1756     assert(bo);
1757     pp_context->idrt.bo = bo;
1758     pp_context->idrt.num_interface_descriptors = 0;
1759
1760     dri_bo_unreference(pp_context->sampler_state_table.bo);
1761     bo = dri_bo_alloc(i965->intel.bufmgr, 
1762                       "sampler state table", 
1763                       4096,
1764                       4096);
1765     assert(bo);
1766     dri_bo_map(bo, True);
1767     memset(bo->virtual, 0, bo->size);
1768     dri_bo_unmap(bo);
1769     pp_context->sampler_state_table.bo = bo;
1770
1771     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1772     bo = dri_bo_alloc(i965->intel.bufmgr, 
1773                       "sampler 8x8 state ",
1774                       4096,
1775                       4096);
1776     assert(bo);
1777     pp_context->sampler_state_table.bo_8x8 = bo;
1778
1779     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1780     bo = dri_bo_alloc(i965->intel.bufmgr, 
1781                       "sampler 8x8 state ",
1782                       4096,
1783                       4096);
1784     assert(bo);
1785     pp_context->sampler_state_table.bo_8x8_uv = bo;
1786
1787     dri_bo_unreference(pp_context->vfe_state.bo);
1788     bo = dri_bo_alloc(i965->intel.bufmgr, 
1789                       "vfe state", 
1790                       sizeof(struct i965_vfe_state), 
1791                       4096);
1792     assert(bo);
1793     pp_context->vfe_state.bo = bo;
1794     
1795     for (i = 0; i < MAX_PP_SURFACES; i++) {
1796         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1797         pp_context->surfaces[i].ss_bo = NULL;
1798
1799         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1800         pp_context->surfaces[i].s_bo = NULL;
1801     }
1802
1803     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1804     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1805     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1806     pp_context->current_pp = pp_index;
1807     pp_module = &pp_context->pp_modules[pp_index];
1808     
1809     if (pp_module->initialize)
1810         pp_module->initialize(ctx, in_surface_id, out_surface_id,
1811                               src_rect, dst_rect);
1812 }
1813
1814 static void
1815 ironlake_post_processing(
1816     VADriverContextP   ctx,
1817     VASurfaceID        in_surface_id,
1818     VASurfaceID        out_surface_id,
1819     const VARectangle *src_rect,
1820     const VARectangle *dst_rect,
1821     int                pp_index
1822 )
1823 {
1824     ironlake_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1825     ironlake_pp_states_setup(ctx);
1826     ironlake_pp_pipeline_setup(ctx);
1827 }
1828
1829 static void
1830 gen6_pp_initialize(
1831     VADriverContextP   ctx,
1832     VASurfaceID        in_surface_id,
1833     VASurfaceID        out_surface_id,
1834     const VARectangle *src_rect,
1835     const VARectangle *dst_rect,
1836     int                pp_index
1837 )
1838 {
1839     struct i965_driver_data *i965 = i965_driver_data(ctx);
1840     struct i965_post_processing_context *pp_context = i965->pp_context;
1841     struct pp_module *pp_module;
1842     dri_bo *bo;
1843     int i;
1844
1845     dri_bo_unreference(pp_context->curbe.bo);
1846     bo = dri_bo_alloc(i965->intel.bufmgr,
1847                       "constant buffer",
1848                       4096, 
1849                       4096);
1850     assert(bo);
1851     pp_context->curbe.bo = bo;
1852
1853     dri_bo_unreference(pp_context->binding_table.bo);
1854     bo = dri_bo_alloc(i965->intel.bufmgr, 
1855                       "binding table",
1856                       sizeof(unsigned int), 
1857                       4096);
1858     assert(bo);
1859     pp_context->binding_table.bo = bo;
1860
1861     dri_bo_unreference(pp_context->idrt.bo);
1862     bo = dri_bo_alloc(i965->intel.bufmgr, 
1863                       "interface discriptor", 
1864                       sizeof(struct gen6_interface_descriptor_data), 
1865                       4096);
1866     assert(bo);
1867     pp_context->idrt.bo = bo;
1868     pp_context->idrt.num_interface_descriptors = 0;
1869
1870     dri_bo_unreference(pp_context->sampler_state_table.bo);
1871     bo = dri_bo_alloc(i965->intel.bufmgr, 
1872                       "sampler state table", 
1873                       4096,
1874                       4096);
1875     assert(bo);
1876     dri_bo_map(bo, True);
1877     memset(bo->virtual, 0, bo->size);
1878     dri_bo_unmap(bo);
1879     pp_context->sampler_state_table.bo = bo;
1880
1881     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1882     bo = dri_bo_alloc(i965->intel.bufmgr, 
1883                       "sampler 8x8 state ",
1884                       4096,
1885                       4096);
1886     assert(bo);
1887     pp_context->sampler_state_table.bo_8x8 = bo;
1888
1889     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1890     bo = dri_bo_alloc(i965->intel.bufmgr, 
1891                       "sampler 8x8 state ",
1892                       4096,
1893                       4096);
1894     assert(bo);
1895     pp_context->sampler_state_table.bo_8x8_uv = bo;
1896
1897     dri_bo_unreference(pp_context->vfe_state.bo);
1898     bo = dri_bo_alloc(i965->intel.bufmgr, 
1899                       "vfe state", 
1900                       sizeof(struct i965_vfe_state), 
1901                       4096);
1902     assert(bo);
1903     pp_context->vfe_state.bo = bo;
1904     
1905     for (i = 0; i < MAX_PP_SURFACES; i++) {
1906         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1907         pp_context->surfaces[i].ss_bo = NULL;
1908
1909         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1910         pp_context->surfaces[i].s_bo = NULL;
1911     }
1912
1913     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1914     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1915     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1916     pp_context->current_pp = pp_index;
1917     pp_module = &pp_context->pp_modules[pp_index];
1918     
1919     if (pp_module->initialize)
1920         pp_module->initialize(ctx, in_surface_id, out_surface_id,
1921                               src_rect, dst_rect);
1922 }
1923
1924 static void
1925 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1926 {
1927     unsigned int *binding_table;
1928     dri_bo *bo = pp_context->binding_table.bo;
1929     int i;
1930
1931     dri_bo_map(bo, 1);
1932     assert(bo->virtual);
1933     binding_table = bo->virtual;
1934     memset(binding_table, 0, bo->size);
1935
1936     for (i = 0; i < MAX_PP_SURFACES; i++) {
1937         if (pp_context->surfaces[i].ss_bo) {
1938             assert(pp_context->surfaces[i].s_bo);
1939
1940             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1941             dri_bo_emit_reloc(bo,
1942                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1943                               0,
1944                               i * sizeof(*binding_table),
1945                               pp_context->surfaces[i].ss_bo);
1946         }
1947     
1948     }
1949
1950     dri_bo_unmap(bo);
1951 }
1952
1953 static void
1954 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1955 {
1956     struct gen6_interface_descriptor_data *desc;
1957     dri_bo *bo;
1958     int pp_index = pp_context->current_pp;
1959
1960     bo = pp_context->idrt.bo;
1961     dri_bo_map(bo, True);
1962     assert(bo->virtual);
1963     desc = bo->virtual;
1964     memset(desc, 0, sizeof(*desc));
1965     desc->desc0.kernel_start_pointer = 
1966         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1967     desc->desc1.single_program_flow = 1;
1968     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1969     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1970     desc->desc2.sampler_state_pointer = 
1971         pp_context->sampler_state_table.bo->offset >> 5;
1972     desc->desc3.binding_table_entry_count = 0;
1973     desc->desc3.binding_table_pointer = 
1974         pp_context->binding_table.bo->offset >> 5; /*reloc */
1975     desc->desc4.constant_urb_entry_read_offset = 0;
1976     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1977
1978     dri_bo_emit_reloc(bo,
1979                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1980                       0,
1981                       offsetof(struct gen6_interface_descriptor_data, desc0),
1982                       pp_context->pp_modules[pp_index].kernel.bo);
1983
1984     dri_bo_emit_reloc(bo,
1985                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1986                       desc->desc2.sampler_count << 2,
1987                       offsetof(struct gen6_interface_descriptor_data, desc2),
1988                       pp_context->sampler_state_table.bo);
1989
1990     dri_bo_emit_reloc(bo,
1991                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1992                       desc->desc3.binding_table_entry_count,
1993                       offsetof(struct gen6_interface_descriptor_data, desc3),
1994                       pp_context->binding_table.bo);
1995
1996     dri_bo_unmap(bo);
1997     pp_context->idrt.num_interface_descriptors++;
1998 }
1999
2000 static void
2001 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2002 {
2003     unsigned char *constant_buffer;
2004
2005     assert(sizeof(pp_static_parameter) == 128);
2006     dri_bo_map(pp_context->curbe.bo, 1);
2007     assert(pp_context->curbe.bo->virtual);
2008     constant_buffer = pp_context->curbe.bo->virtual;
2009     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2010     dri_bo_unmap(pp_context->curbe.bo);
2011 }
2012
2013 static void
2014 gen6_pp_states_setup(VADriverContextP ctx)
2015 {
2016     struct i965_driver_data *i965 = i965_driver_data(ctx);
2017     struct i965_post_processing_context *pp_context = i965->pp_context;
2018
2019     gen6_pp_binding_table(pp_context);
2020     gen6_pp_interface_descriptor_table(pp_context);
2021     gen6_pp_upload_constants(pp_context);
2022 }
2023
2024 static void
2025 gen6_pp_pipeline_select(VADriverContextP ctx)
2026 {
2027     struct i965_driver_data *i965 = i965_driver_data(ctx);
2028     struct intel_batchbuffer *batch = i965->batch;
2029
2030     BEGIN_BATCH(batch, 1);
2031     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2032     ADVANCE_BATCH(batch);
2033 }
2034
2035 static void
2036 gen6_pp_state_base_address(VADriverContextP ctx)
2037 {
2038     struct i965_driver_data *i965 = i965_driver_data(ctx);
2039     struct intel_batchbuffer *batch = i965->batch;
2040
2041     BEGIN_BATCH(batch, 10);
2042     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2043     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2044     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2045     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2046     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2047     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2048     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2049     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2050     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2051     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2052     ADVANCE_BATCH(batch);
2053 }
2054
2055 static void
2056 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2057 {
2058     struct i965_driver_data *i965 = i965_driver_data(ctx);
2059     struct intel_batchbuffer *batch = i965->batch;
2060
2061     BEGIN_BATCH(batch, 8);
2062     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2063     OUT_BATCH(batch, 0);
2064     OUT_BATCH(batch,
2065               (pp_context->urb.num_vfe_entries - 1) << 16 |
2066               pp_context->urb.num_vfe_entries << 8);
2067     OUT_BATCH(batch, 0);
2068     OUT_BATCH(batch,
2069               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2070               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2071     OUT_BATCH(batch, 0);
2072     OUT_BATCH(batch, 0);
2073     OUT_BATCH(batch, 0);
2074     ADVANCE_BATCH(batch);
2075 }
2076
2077 static void
2078 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2079 {
2080     struct i965_driver_data *i965 = i965_driver_data(ctx);
2081     struct intel_batchbuffer *batch = i965->batch;
2082
2083     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2084
2085     BEGIN_BATCH(batch, 4);
2086     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2087     OUT_BATCH(batch, 0);
2088     OUT_BATCH(batch,
2089               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2090     OUT_RELOC(batch, 
2091               pp_context->curbe.bo,
2092               I915_GEM_DOMAIN_INSTRUCTION, 0,
2093               0);
2094     ADVANCE_BATCH(batch);
2095 }
2096
2097 static void
2098 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2099 {
2100     struct i965_driver_data *i965 = i965_driver_data(ctx);
2101     struct intel_batchbuffer *batch = i965->batch;
2102
2103     BEGIN_BATCH(batch, 4);
2104     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2105     OUT_BATCH(batch, 0);
2106     OUT_BATCH(batch,
2107               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2108     OUT_RELOC(batch, 
2109               pp_context->idrt.bo,
2110               I915_GEM_DOMAIN_INSTRUCTION, 0,
2111               0);
2112     ADVANCE_BATCH(batch);
2113 }
2114
2115 static void
2116 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2117 {
2118     struct i965_driver_data *i965 = i965_driver_data(ctx);
2119     struct intel_batchbuffer *batch = i965->batch;
2120     int x, x_steps, y, y_steps;
2121
2122     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2123     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2124
2125     for (y = 0; y < y_steps; y++) {
2126         for (x = 0; x < x_steps; x++) {
2127             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2128                 BEGIN_BATCH(batch, 22);
2129                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2130                 OUT_BATCH(batch, 0);
2131                 OUT_BATCH(batch, 0); /* no indirect data */
2132                 OUT_BATCH(batch, 0);
2133                 OUT_BATCH(batch, 0); /* scoreboard */
2134                 OUT_BATCH(batch, 0);
2135
2136                 /* inline data grf 5-6 */
2137                 assert(sizeof(pp_inline_parameter) == 64);
2138                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2139
2140                 ADVANCE_BATCH(batch);
2141             }
2142         }
2143     }
2144 }
2145
2146 static void
2147 gen6_pp_pipeline_setup(VADriverContextP ctx)
2148 {
2149     struct i965_driver_data *i965 = i965_driver_data(ctx);
2150     struct intel_batchbuffer *batch = i965->batch;
2151     struct i965_post_processing_context *pp_context = i965->pp_context;
2152
2153     intel_batchbuffer_start_atomic(batch, 0x1000);
2154     intel_batchbuffer_emit_mi_flush(batch);
2155     gen6_pp_pipeline_select(ctx);
2156     gen6_pp_curbe_load(ctx, pp_context);
2157     gen6_interface_descriptor_load(ctx, pp_context);
2158     gen6_pp_state_base_address(ctx);
2159     gen6_pp_vfe_state(ctx, pp_context);
2160     gen6_pp_object_walker(ctx, pp_context);
2161     intel_batchbuffer_end_atomic(batch);
2162 }
2163
2164 static void
2165 gen6_post_processing(
2166     VADriverContextP   ctx,
2167     VASurfaceID        in_surface_id,
2168     VASurfaceID        out_surface_id,
2169     const VARectangle *src_rect,
2170     const VARectangle *dst_rect,
2171     int                pp_index
2172 )
2173 {
2174     gen6_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2175     gen6_pp_states_setup(ctx);
2176     gen6_pp_pipeline_setup(ctx);
2177 }
2178
2179 static void
2180 i965_post_processing_internal(
2181     VADriverContextP   ctx,
2182     VASurfaceID        in_surface_id,
2183     VASurfaceID        out_surface_id,
2184     const VARectangle *src_rect,
2185     const VARectangle *dst_rect,
2186     int                pp_index
2187 )
2188 {
2189     struct i965_driver_data *i965 = i965_driver_data(ctx);
2190
2191     if (IS_GEN6(i965->intel.device_id) ||
2192         IS_GEN7(i965->intel.device_id))
2193         gen6_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2194     else
2195         ironlake_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2196 }
2197
2198 VAStatus 
2199 i965_DestroySurfaces(VADriverContextP ctx,
2200                      VASurfaceID *surface_list,
2201                      int num_surfaces);
2202 VAStatus 
2203 i965_CreateSurfaces(VADriverContextP ctx,
2204                     int width,
2205                     int height,
2206                     int format,
2207                     int num_surfaces,
2208                     VASurfaceID *surfaces);
2209 VASurfaceID
2210 i965_post_processing(
2211     VADriverContextP   ctx,
2212     VASurfaceID        surface,
2213     const VARectangle *src_rect,
2214     const VARectangle *dst_rect,
2215     unsigned int       flags,
2216     int               *has_done_scaling  
2217 )
2218 {
2219     struct i965_driver_data *i965 = i965_driver_data(ctx);
2220     VASurfaceID in_surface_id = surface;
2221     VASurfaceID out_surface_id = VA_INVALID_ID;
2222
2223     if (HAS_PP(i965)) {
2224         /* Currently only support post processing for NV12 surface */
2225         if (i965->render_state.interleaved_uv) {
2226             struct object_surface *obj_surface;
2227             VAStatus status;
2228
2229             if (flags & I965_PP_FLAG_DEINTERLACING) {
2230                 obj_surface = SURFACE(in_surface_id);
2231                 status = i965_CreateSurfaces(ctx,
2232                                              obj_surface->orig_width,
2233                                              obj_surface->orig_height,
2234                                              VA_RT_FORMAT_YUV420,
2235                                              1,
2236                                              &out_surface_id);
2237                 assert(status == VA_STATUS_SUCCESS);
2238                 obj_surface = SURFACE(out_surface_id);
2239                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2240                 i965_post_processing_internal(ctx,
2241                                               in_surface_id, out_surface_id,
2242                                               src_rect, dst_rect,
2243                                               PP_NV12_DNDI);
2244             }
2245
2246             if (flags & I965_PP_FLAG_AVS) {
2247                 struct i965_render_state *render_state = &i965->render_state;
2248                 struct intel_region *dest_region = render_state->draw_region;
2249
2250                 if (out_surface_id != VA_INVALID_ID)
2251                     in_surface_id = out_surface_id;
2252
2253                 status = i965_CreateSurfaces(ctx,
2254                                              dest_region->width,
2255                                              dest_region->height,
2256                                              VA_RT_FORMAT_YUV420,
2257                                              1,
2258                                              &out_surface_id);
2259                 assert(status == VA_STATUS_SUCCESS);
2260                 obj_surface = SURFACE(out_surface_id);
2261                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2262                 i965_post_processing_internal(ctx,
2263                                               in_surface_id, out_surface_id,
2264                                               src_rect, dst_rect,
2265                                               PP_NV12_AVS);
2266
2267                 if (in_surface_id != surface)
2268                     i965_DestroySurfaces(ctx, &in_surface_id, 1);
2269                 
2270                 *has_done_scaling = 1;
2271             }
2272         }
2273     }
2274
2275     return out_surface_id;
2276 }       
2277
2278 Bool
2279 i965_post_processing_terminate(VADriverContextP ctx)
2280 {
2281     struct i965_driver_data *i965 = i965_driver_data(ctx);
2282     struct i965_post_processing_context *pp_context = i965->pp_context;
2283     int i;
2284
2285     if (HAS_PP(i965)) {
2286         if (pp_context) {
2287             dri_bo_unreference(pp_context->curbe.bo);
2288             pp_context->curbe.bo = NULL;
2289
2290             for (i = 0; i < MAX_PP_SURFACES; i++) {
2291                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2292                 pp_context->surfaces[i].ss_bo = NULL;
2293
2294                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2295                 pp_context->surfaces[i].s_bo = NULL;
2296             }
2297
2298             dri_bo_unreference(pp_context->sampler_state_table.bo);
2299             pp_context->sampler_state_table.bo = NULL;
2300
2301             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2302             pp_context->sampler_state_table.bo_8x8 = NULL;
2303
2304             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2305             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2306
2307             dri_bo_unreference(pp_context->binding_table.bo);
2308             pp_context->binding_table.bo = NULL;
2309
2310             dri_bo_unreference(pp_context->idrt.bo);
2311             pp_context->idrt.bo = NULL;
2312             pp_context->idrt.num_interface_descriptors = 0;
2313
2314             dri_bo_unreference(pp_context->vfe_state.bo);
2315             pp_context->vfe_state.bo = NULL;
2316
2317             dri_bo_unreference(pp_context->stmm.bo);
2318             pp_context->stmm.bo = NULL;
2319
2320             for (i = 0; i < NUM_PP_MODULES; i++) {
2321                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2322
2323                 dri_bo_unreference(pp_module->kernel.bo);
2324                 pp_module->kernel.bo = NULL;
2325             }
2326
2327             free(pp_context);
2328         }
2329
2330         i965->pp_context = NULL;
2331     }
2332
2333     return True;
2334 }
2335
2336 Bool
2337 i965_post_processing_init(VADriverContextP ctx)
2338 {
2339     struct i965_driver_data *i965 = i965_driver_data(ctx);
2340     struct i965_post_processing_context *pp_context = i965->pp_context;
2341     int i;
2342
2343     if (HAS_PP(i965)) {
2344         if (pp_context == NULL) {
2345             pp_context = calloc(1, sizeof(*pp_context));
2346             i965->pp_context = pp_context;
2347
2348             pp_context->urb.size = URB_SIZE((&i965->intel));
2349             pp_context->urb.num_vfe_entries = 32;
2350             pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2351             pp_context->urb.num_cs_entries = 1;
2352             pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2353             pp_context->urb.vfe_start = 0;
2354             pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2355                 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2356             assert(pp_context->urb.cs_start + 
2357                    pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2358
2359             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2360             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2361
2362             if (IS_GEN6(i965->intel.device_id) ||
2363                 IS_GEN7(i965->intel.device_id))
2364                 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2365             else if (IS_IRONLAKE(i965->intel.device_id))
2366                 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2367
2368             for (i = 0; i < NUM_PP_MODULES; i++) {
2369                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2370                 dri_bo_unreference(pp_module->kernel.bo);
2371                 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2372                                                     pp_module->kernel.name,
2373                                                     pp_module->kernel.size,
2374                                                     4096);
2375                 assert(pp_module->kernel.bo);
2376                 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2377             }
2378         }
2379     }
2380
2381     return True;
2382 }