Moved files around.
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 static const uint32_t pp_null_gen5[][4] = {
49 #include "shaders/post_processing/null.g4b.gen5"
50 };
51
52 static const uint32_t pp_nv12_load_save_gen5[][4] = {
53 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
54 };
55
56 static const uint32_t pp_nv12_scaling_gen5[][4] = {
57 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_avs_gen5[][4] = {
61 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_dndi_gen5[][4] = {
65 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
66 };
67
68 static void pp_null_initialize(VADriverContextP ctx,
69                                VASurfaceID in_surface_id, VASurfaceID out_surface_id,
70                                const VARectangle *src_rect, const VARectangle *dst_rect);
71 static void pp_nv12_avs_initialize(VADriverContextP ctx,
72                                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
73                                    const VARectangle *src_rect, const VARectangle *dst_rect);
74 static void pp_nv12_scaling_initialize(VADriverContextP ctx,
75                                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
76                                        const VARectangle *src_rect, const VARectangle *dst_rect);
77 static void pp_nv12_load_save_initialize(VADriverContextP ctx,
78                                          VASurfaceID in_surface_id, VASurfaceID out_surface_id,
79                                          const VARectangle *src_rect, const VARectangle *dst_rect);
80 static void pp_nv12_dndi_initialize(VADriverContextP ctx,
81                                     VASurfaceID in_surface_id, VASurfaceID out_surface_id,
82                                     const VARectangle *src_rect, const VARectangle *dst_rect);
83
84 static struct pp_module pp_modules_gen5[] = {
85     {
86         {
87             "NULL module (for testing)",
88             PP_NULL,
89             pp_null_gen5,
90             sizeof(pp_null_gen5),
91             NULL,
92         },
93
94         pp_null_initialize,
95     },
96
97     {
98         {
99             "NV12 Load & Save module",
100             PP_NV12_LOAD_SAVE,
101             pp_nv12_load_save_gen5,
102             sizeof(pp_nv12_load_save_gen5),
103             NULL,
104         },
105
106         pp_nv12_load_save_initialize,
107     },
108
109     {
110         {
111             "NV12 Scaling module",
112             PP_NV12_SCALING,
113             pp_nv12_scaling_gen5,
114             sizeof(pp_nv12_scaling_gen5),
115             NULL,
116         },
117
118         pp_nv12_scaling_initialize,
119     },
120
121     {
122         {
123             "NV12 AVS module",
124             PP_NV12_AVS,
125             pp_nv12_avs_gen5,
126             sizeof(pp_nv12_avs_gen5),
127             NULL,
128         },
129
130         pp_nv12_avs_initialize,
131     },
132
133     {
134         {
135             "NV12 DNDI module",
136             PP_NV12_DNDI,
137             pp_nv12_dndi_gen5,
138             sizeof(pp_nv12_dndi_gen5),
139             NULL,
140         },
141
142         pp_nv12_dndi_initialize,
143     },
144 };
145
146 static const uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
148 };
149
150 static const uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
152 };
153
154 static const uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
156 };
157
158 static const uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
160 };
161
162 static const uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
164 };
165
166 static struct pp_module pp_modules_gen6[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen6,
172             sizeof(pp_null_gen6),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12 Load & Save module",
182             PP_NV12_LOAD_SAVE,
183             pp_nv12_load_save_gen6,
184             sizeof(pp_nv12_load_save_gen6),
185             NULL,
186         },
187
188         pp_nv12_load_save_initialize,
189     },
190
191     {
192         {
193             "NV12 Scaling module",
194             PP_NV12_SCALING,
195             pp_nv12_scaling_gen6,
196             sizeof(pp_nv12_scaling_gen6),
197             NULL,
198         },
199
200         pp_nv12_scaling_initialize,
201     },
202
203     {
204         {
205             "NV12 AVS module",
206             PP_NV12_AVS,
207             pp_nv12_avs_gen6,
208             sizeof(pp_nv12_avs_gen6),
209             NULL,
210         },
211
212         pp_nv12_avs_initialize,
213     },
214
215     {
216         {
217             "NV12 DNDI module",
218             PP_NV12_DNDI,
219             pp_nv12_dndi_gen6,
220             sizeof(pp_nv12_dndi_gen6),
221             NULL,
222         },
223
224         pp_nv12_dndi_initialize,
225     },
226 };
227
228 #define pp_static_parameter     pp_context->pp_static_parameter
229 #define pp_inline_parameter     pp_context->pp_inline_parameter
230
231 static void
232 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234     switch (tiling) {
235     case I915_TILING_NONE:
236         ss->ss3.tiled_surface = 0;
237         ss->ss3.tile_walk = 0;
238         break;
239     case I915_TILING_X:
240         ss->ss3.tiled_surface = 1;
241         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242         break;
243     case I915_TILING_Y:
244         ss->ss3.tiled_surface = 1;
245         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246         break;
247     }
248 }
249
250 static void
251 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253     switch (tiling) {
254     case I915_TILING_NONE:
255         ss->ss2.tiled_surface = 0;
256         ss->ss2.tile_walk = 0;
257         break;
258     case I915_TILING_X:
259         ss->ss2.tiled_surface = 1;
260         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261         break;
262     case I915_TILING_Y:
263         ss->ss2.tiled_surface = 1;
264         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265         break;
266     }
267 }
268
269 static void
270 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
271 {
272
273 }
274
275 static void
276 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
277 {
278     struct i965_interface_descriptor *desc;
279     dri_bo *bo;
280     int pp_index = pp_context->current_pp;
281
282     bo = pp_context->idrt.bo;
283     dri_bo_map(bo, 1);
284     assert(bo->virtual);
285     desc = bo->virtual;
286     memset(desc, 0, sizeof(*desc));
287     desc->desc0.grf_reg_blocks = 10;
288     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
289     desc->desc1.const_urb_entry_read_offset = 0;
290     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
291     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
292     desc->desc2.sampler_count = 0;
293     desc->desc3.binding_table_entry_count = 0;
294     desc->desc3.binding_table_pointer = 
295         pp_context->binding_table.bo->offset >> 5; /*reloc */
296
297     dri_bo_emit_reloc(bo,
298                       I915_GEM_DOMAIN_INSTRUCTION, 0,
299                       desc->desc0.grf_reg_blocks,
300                       offsetof(struct i965_interface_descriptor, desc0),
301                       pp_context->pp_modules[pp_index].kernel.bo);
302
303     dri_bo_emit_reloc(bo,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       desc->desc2.sampler_count << 2,
306                       offsetof(struct i965_interface_descriptor, desc2),
307                       pp_context->sampler_state_table.bo);
308
309     dri_bo_emit_reloc(bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, 0,
311                       desc->desc3.binding_table_entry_count,
312                       offsetof(struct i965_interface_descriptor, desc3),
313                       pp_context->binding_table.bo);
314
315     dri_bo_unmap(bo);
316     pp_context->idrt.num_interface_descriptors++;
317 }
318
319 static void
320 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
321 {
322     unsigned int *binding_table;
323     dri_bo *bo = pp_context->binding_table.bo;
324     int i;
325
326     dri_bo_map(bo, 1);
327     assert(bo->virtual);
328     binding_table = bo->virtual;
329     memset(binding_table, 0, bo->size);
330
331     for (i = 0; i < MAX_PP_SURFACES; i++) {
332         if (pp_context->surfaces[i].ss_bo) {
333             assert(pp_context->surfaces[i].s_bo);
334
335             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
336             dri_bo_emit_reloc(bo,
337                               I915_GEM_DOMAIN_INSTRUCTION, 0,
338                               0,
339                               i * sizeof(*binding_table),
340                               pp_context->surfaces[i].ss_bo);
341         }
342     
343     }
344
345     dri_bo_unmap(bo);
346 }
347
348 static void
349 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
350 {
351     struct i965_vfe_state *vfe_state;
352     dri_bo *bo;
353
354     bo = pp_context->vfe_state.bo;
355     dri_bo_map(bo, 1);
356     assert(bo->virtual);
357     vfe_state = bo->virtual;
358     memset(vfe_state, 0, sizeof(*vfe_state));
359     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
360     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
361     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
362     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
363     vfe_state->vfe1.children_present = 0;
364     vfe_state->vfe2.interface_descriptor_base = 
365         pp_context->idrt.bo->offset >> 4; /* reloc */
366     dri_bo_emit_reloc(bo,
367                       I915_GEM_DOMAIN_INSTRUCTION, 0,
368                       0,
369                       offsetof(struct i965_vfe_state, vfe2),
370                       pp_context->idrt.bo);
371     dri_bo_unmap(bo);
372 }
373
374 static void
375 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
376 {
377     unsigned char *constant_buffer;
378
379     assert(sizeof(pp_static_parameter) == 128);
380     dri_bo_map(pp_context->curbe.bo, 1);
381     assert(pp_context->curbe.bo->virtual);
382     constant_buffer = pp_context->curbe.bo->virtual;
383     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
384     dri_bo_unmap(pp_context->curbe.bo);
385 }
386
387 static void
388 ironlake_pp_states_setup(VADriverContextP ctx)
389 {
390     struct i965_driver_data *i965 = i965_driver_data(ctx);
391     struct i965_post_processing_context *pp_context = i965->pp_context;
392
393     ironlake_pp_surface_state(pp_context);
394     ironlake_pp_binding_table(pp_context);
395     ironlake_pp_interface_descriptor_table(pp_context);
396     ironlake_pp_vfe_state(pp_context);
397     ironlake_pp_upload_constants(pp_context);
398 }
399
400 static void
401 ironlake_pp_pipeline_select(VADriverContextP ctx)
402 {
403     struct i965_driver_data *i965 = i965_driver_data(ctx);
404     struct intel_batchbuffer *batch = i965->batch;
405
406     BEGIN_BATCH(batch, 1);
407     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
408     ADVANCE_BATCH(batch);
409 }
410
411 static void
412 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
413 {
414     struct i965_driver_data *i965 = i965_driver_data(ctx);
415     struct intel_batchbuffer *batch = i965->batch;
416     unsigned int vfe_fence, cs_fence;
417
418     vfe_fence = pp_context->urb.cs_start;
419     cs_fence = pp_context->urb.size;
420
421     BEGIN_BATCH(batch, 3);
422     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
423     OUT_BATCH(batch, 0);
424     OUT_BATCH(batch, 
425               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
426               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
427     ADVANCE_BATCH(batch);
428 }
429
430 static void
431 ironlake_pp_state_base_address(VADriverContextP ctx)
432 {
433     struct i965_driver_data *i965 = i965_driver_data(ctx);
434     struct intel_batchbuffer *batch = i965->batch;
435
436     BEGIN_BATCH(batch, 8);
437     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
444     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
445     ADVANCE_BATCH(batch);
446 }
447
448 static void
449 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
450 {
451     struct i965_driver_data *i965 = i965_driver_data(ctx);
452     struct intel_batchbuffer *batch = i965->batch;
453
454     BEGIN_BATCH(batch, 3);
455     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
456     OUT_BATCH(batch, 0);
457     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
458     ADVANCE_BATCH(batch);
459 }
460
461 static void 
462 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
463 {
464     struct i965_driver_data *i965 = i965_driver_data(ctx);
465     struct intel_batchbuffer *batch = i965->batch;
466
467     BEGIN_BATCH(batch, 2);
468     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
469     OUT_BATCH(batch,
470               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
471               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
472     ADVANCE_BATCH(batch);
473 }
474
475 static void
476 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
477 {
478     struct i965_driver_data *i965 = i965_driver_data(ctx);
479     struct intel_batchbuffer *batch = i965->batch;
480
481     BEGIN_BATCH(batch, 2);
482     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
483     OUT_RELOC(batch, pp_context->curbe.bo,
484               I915_GEM_DOMAIN_INSTRUCTION, 0,
485               pp_context->urb.size_cs_entry - 1);
486     ADVANCE_BATCH(batch);    
487 }
488
489 static void
490 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
491 {
492     struct i965_driver_data *i965 = i965_driver_data(ctx);
493     struct intel_batchbuffer *batch = i965->batch;
494     int x, x_steps, y, y_steps;
495
496     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
497     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
498
499     for (y = 0; y < y_steps; y++) {
500         for (x = 0; x < x_steps; x++) {
501             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
502                 BEGIN_BATCH(batch, 20);
503                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
504                 OUT_BATCH(batch, 0);
505                 OUT_BATCH(batch, 0); /* no indirect data */
506                 OUT_BATCH(batch, 0);
507
508                 /* inline data grf 5-6 */
509                 assert(sizeof(pp_inline_parameter) == 64);
510                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
511
512                 ADVANCE_BATCH(batch);
513             }
514         }
515     }
516 }
517
518 static void
519 ironlake_pp_pipeline_setup(VADriverContextP ctx)
520 {
521     struct i965_driver_data *i965 = i965_driver_data(ctx);
522     struct intel_batchbuffer *batch = i965->batch;
523     struct i965_post_processing_context *pp_context = i965->pp_context;
524
525     intel_batchbuffer_start_atomic(batch, 0x1000);
526     intel_batchbuffer_emit_mi_flush(batch);
527     ironlake_pp_pipeline_select(ctx);
528     ironlake_pp_state_base_address(ctx);
529     ironlake_pp_state_pointers(ctx, pp_context);
530     ironlake_pp_urb_layout(ctx, pp_context);
531     ironlake_pp_cs_urb_layout(ctx, pp_context);
532     ironlake_pp_constant_buffer(ctx, pp_context);
533     ironlake_pp_object_walker(ctx, pp_context);
534     intel_batchbuffer_end_atomic(batch);
535 }
536
537 static int
538 pp_null_x_steps(void *private_context)
539 {
540     return 1;
541 }
542
543 static int
544 pp_null_y_steps(void *private_context)
545 {
546     return 1;
547 }
548
549 static int
550 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
551 {
552     return 0;
553 }
554
555 static void
556 pp_null_initialize(VADriverContextP ctx, 
557                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
558                    const VARectangle *src_rect, const VARectangle *dst_rect)
559 {
560     struct i965_driver_data *i965 = i965_driver_data(ctx);
561     struct i965_post_processing_context *pp_context = i965->pp_context;
562
563     /* private function & data */
564     pp_context->pp_x_steps = pp_null_x_steps;
565     pp_context->pp_y_steps = pp_null_y_steps;
566     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
567 }
568
569 static int
570 pp_load_save_x_steps(void *private_context)
571 {
572     return 1;
573 }
574
575 static int
576 pp_load_save_y_steps(void *private_context)
577 {
578     struct pp_load_save_context *pp_load_save_context = private_context;
579
580     return pp_load_save_context->dest_h / 8;
581 }
582
583 static int
584 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
585 {
586     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
587     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
588     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
589     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
590
591     return 0;
592 }
593
594 static void
595 pp_nv12_load_save_initialize(VADriverContextP ctx,
596                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
597                              const VARectangle *src_rect, const VARectangle *dst_rect)
598 {
599     struct i965_driver_data *i965 = i965_driver_data(ctx);
600     struct i965_post_processing_context *pp_context = i965->pp_context;
601     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
602     struct object_surface *obj_surface;
603     struct i965_surface_state *ss;
604     dri_bo *bo;
605     int index, w, h;
606     int orig_w, orig_h;
607     unsigned int tiling, swizzle;
608
609     /* source surface */
610     obj_surface = SURFACE(in_surface_id);
611     orig_w = obj_surface->orig_width;
612     orig_h = obj_surface->orig_height;
613     w = obj_surface->width;
614     h = obj_surface->height;
615     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
616
617     /* source Y surface index 1 */
618     index = 1;
619     pp_context->surfaces[index].s_bo = obj_surface->bo;
620     dri_bo_reference(pp_context->surfaces[index].s_bo);
621     bo = dri_bo_alloc(i965->intel.bufmgr, 
622                       "surface state", 
623                       sizeof(struct i965_surface_state), 
624                       4096);
625     assert(bo);
626     pp_context->surfaces[index].ss_bo = bo;
627     dri_bo_map(bo, True);
628     assert(bo->virtual);
629     ss = bo->virtual;
630     memset(ss, 0, sizeof(*ss));
631     ss->ss0.surface_type = I965_SURFACE_2D;
632     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
633     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
634     ss->ss2.width = orig_w / 4 - 1;
635     ss->ss2.height = orig_h - 1;
636     ss->ss3.pitch = w - 1;
637     pp_set_surface_tiling(ss, tiling);
638     dri_bo_emit_reloc(bo,
639                       I915_GEM_DOMAIN_RENDER, 
640                       0,
641                       0,
642                       offsetof(struct i965_surface_state, ss1),
643                       pp_context->surfaces[index].s_bo);
644     dri_bo_unmap(bo);
645
646     /* source UV surface index 2 */
647     index = 2;
648     pp_context->surfaces[index].s_bo = obj_surface->bo;
649     dri_bo_reference(pp_context->surfaces[index].s_bo);
650     bo = dri_bo_alloc(i965->intel.bufmgr, 
651                       "surface state", 
652                       sizeof(struct i965_surface_state), 
653                       4096);
654     assert(bo);
655     pp_context->surfaces[index].ss_bo = bo;
656     dri_bo_map(bo, True);
657     assert(bo->virtual);
658     ss = bo->virtual;
659     memset(ss, 0, sizeof(*ss));
660     ss->ss0.surface_type = I965_SURFACE_2D;
661     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
662     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
663     ss->ss2.width = orig_w / 4 - 1;
664     ss->ss2.height = orig_h / 2 - 1;
665     ss->ss3.pitch = w - 1;
666     pp_set_surface_tiling(ss, tiling);
667     dri_bo_emit_reloc(bo,
668                       I915_GEM_DOMAIN_RENDER, 
669                       0,
670                       w * h,
671                       offsetof(struct i965_surface_state, ss1),
672                       pp_context->surfaces[index].s_bo);
673     dri_bo_unmap(bo);
674
675     /* destination surface */
676     obj_surface = SURFACE(out_surface_id);
677     orig_w = obj_surface->orig_width;
678     orig_h = obj_surface->orig_height;
679     w = obj_surface->width;
680     h = obj_surface->height;
681     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
682
683     /* destination Y surface index 7 */
684     index = 7;
685     pp_context->surfaces[index].s_bo = obj_surface->bo;
686     dri_bo_reference(pp_context->surfaces[index].s_bo);
687     bo = dri_bo_alloc(i965->intel.bufmgr, 
688                       "surface state", 
689                       sizeof(struct i965_surface_state), 
690                       4096);
691     assert(bo);
692     pp_context->surfaces[index].ss_bo = bo;
693     dri_bo_map(bo, True);
694     assert(bo->virtual);
695     ss = bo->virtual;
696     memset(ss, 0, sizeof(*ss));
697     ss->ss0.surface_type = I965_SURFACE_2D;
698     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
699     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
700     ss->ss2.width = orig_w / 4 - 1;
701     ss->ss2.height = orig_h - 1;
702     ss->ss3.pitch = w - 1;
703     pp_set_surface_tiling(ss, tiling);
704     dri_bo_emit_reloc(bo,
705                       I915_GEM_DOMAIN_RENDER, 
706                       I915_GEM_DOMAIN_RENDER,
707                       0,
708                       offsetof(struct i965_surface_state, ss1),
709                       pp_context->surfaces[index].s_bo);
710     dri_bo_unmap(bo);
711
712     /* destination UV surface index 8 */
713     index = 8;
714     pp_context->surfaces[index].s_bo = obj_surface->bo;
715     dri_bo_reference(pp_context->surfaces[index].s_bo);
716     bo = dri_bo_alloc(i965->intel.bufmgr, 
717                       "surface state", 
718                       sizeof(struct i965_surface_state), 
719                       4096);
720     assert(bo);
721     pp_context->surfaces[index].ss_bo = bo;
722     dri_bo_map(bo, True);
723     assert(bo->virtual);
724     ss = bo->virtual;
725     memset(ss, 0, sizeof(*ss));
726     ss->ss0.surface_type = I965_SURFACE_2D;
727     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
728     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
729     ss->ss2.width = orig_w / 4 - 1;
730     ss->ss2.height = orig_h / 2 - 1;
731     ss->ss3.pitch = w - 1;
732     pp_set_surface_tiling(ss, tiling);
733     dri_bo_emit_reloc(bo,
734                       I915_GEM_DOMAIN_RENDER, 
735                       I915_GEM_DOMAIN_RENDER,
736                       w * h,
737                       offsetof(struct i965_surface_state, ss1),
738                       pp_context->surfaces[index].s_bo);
739     dri_bo_unmap(bo);
740
741     /* private function & data */
742     pp_context->pp_x_steps = pp_load_save_x_steps;
743     pp_context->pp_y_steps = pp_load_save_y_steps;
744     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
745     pp_load_save_context->dest_h = h;
746     pp_load_save_context->dest_w = w;
747
748     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
749     pp_inline_parameter.grf5.number_blocks = w / 16;
750 }
751
752 static int
753 pp_scaling_x_steps(void *private_context)
754 {
755     return 1;
756 }
757
758 static int
759 pp_scaling_y_steps(void *private_context)
760 {
761     struct pp_scaling_context *pp_scaling_context = private_context;
762
763     return pp_scaling_context->dest_h / 8;
764 }
765
766 static int
767 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
768 {
769     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
770     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
771     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
772
773     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
774     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
775     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
776     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
777     
778     return 0;
779 }
780
781 static void
782 pp_nv12_scaling_initialize(VADriverContextP ctx,
783                            VASurfaceID in_surface_id, VASurfaceID out_surface_id,
784                            const VARectangle *src_rect, const VARectangle *dst_rect)
785 {
786     struct i965_driver_data *i965 = i965_driver_data(ctx);
787     struct i965_post_processing_context *pp_context = i965->pp_context;
788     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
789     struct object_surface *obj_surface;
790     struct i965_sampler_state *sampler_state;
791     struct i965_surface_state *ss;
792     dri_bo *bo;
793     int index;
794     int in_w, in_h, in_wpitch, in_hpitch;
795     int out_w, out_h, out_wpitch, out_hpitch;
796     unsigned int tiling, swizzle;
797
798     /* source surface */
799     obj_surface = SURFACE(in_surface_id);
800     in_w = obj_surface->orig_width;
801     in_h = obj_surface->orig_height;
802     in_wpitch = obj_surface->width;
803     in_hpitch = obj_surface->height;
804     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
805
806     /* source Y surface index 1 */
807     index = 1;
808     pp_context->surfaces[index].s_bo = obj_surface->bo;
809     dri_bo_reference(pp_context->surfaces[index].s_bo);
810     bo = dri_bo_alloc(i965->intel.bufmgr, 
811                       "surface state", 
812                       sizeof(struct i965_surface_state), 
813                       4096);
814     assert(bo);
815     pp_context->surfaces[index].ss_bo = bo;
816     dri_bo_map(bo, True);
817     assert(bo->virtual);
818     ss = bo->virtual;
819     memset(ss, 0, sizeof(*ss));
820     ss->ss0.surface_type = I965_SURFACE_2D;
821     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
822     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
823     ss->ss2.width = in_w - 1;
824     ss->ss2.height = in_h - 1;
825     ss->ss3.pitch = in_wpitch - 1;
826     pp_set_surface_tiling(ss, tiling);
827     dri_bo_emit_reloc(bo,
828                       I915_GEM_DOMAIN_RENDER, 
829                       0,
830                       0,
831                       offsetof(struct i965_surface_state, ss1),
832                       pp_context->surfaces[index].s_bo);
833     dri_bo_unmap(bo);
834
835     /* source UV surface index 2 */
836     index = 2;
837     pp_context->surfaces[index].s_bo = obj_surface->bo;
838     dri_bo_reference(pp_context->surfaces[index].s_bo);
839     bo = dri_bo_alloc(i965->intel.bufmgr, 
840                       "surface state", 
841                       sizeof(struct i965_surface_state), 
842                       4096);
843     assert(bo);
844     pp_context->surfaces[index].ss_bo = bo;
845     dri_bo_map(bo, True);
846     assert(bo->virtual);
847     ss = bo->virtual;
848     memset(ss, 0, sizeof(*ss));
849     ss->ss0.surface_type = I965_SURFACE_2D;
850     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
851     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
852     ss->ss2.width = in_w / 2 - 1;
853     ss->ss2.height = in_h / 2 - 1;
854     ss->ss3.pitch = in_wpitch - 1;
855     pp_set_surface_tiling(ss, tiling);
856     dri_bo_emit_reloc(bo,
857                       I915_GEM_DOMAIN_RENDER, 
858                       0,
859                       in_wpitch * in_hpitch,
860                       offsetof(struct i965_surface_state, ss1),
861                       pp_context->surfaces[index].s_bo);
862     dri_bo_unmap(bo);
863
864     /* destination surface */
865     obj_surface = SURFACE(out_surface_id);
866     out_w = obj_surface->orig_width;
867     out_h = obj_surface->orig_height;
868     out_wpitch = obj_surface->width;
869     out_hpitch = obj_surface->height;
870     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
871
872     /* destination Y surface index 7 */
873     index = 7;
874     pp_context->surfaces[index].s_bo = obj_surface->bo;
875     dri_bo_reference(pp_context->surfaces[index].s_bo);
876     bo = dri_bo_alloc(i965->intel.bufmgr, 
877                       "surface state", 
878                       sizeof(struct i965_surface_state), 
879                       4096);
880     assert(bo);
881     pp_context->surfaces[index].ss_bo = bo;
882     dri_bo_map(bo, True);
883     assert(bo->virtual);
884     ss = bo->virtual;
885     memset(ss, 0, sizeof(*ss));
886     ss->ss0.surface_type = I965_SURFACE_2D;
887     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
888     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
889     ss->ss2.width = out_w / 4 - 1;
890     ss->ss2.height = out_h - 1;
891     ss->ss3.pitch = out_wpitch - 1;
892     pp_set_surface_tiling(ss, tiling);
893     dri_bo_emit_reloc(bo,
894                       I915_GEM_DOMAIN_RENDER, 
895                       I915_GEM_DOMAIN_RENDER,
896                       0,
897                       offsetof(struct i965_surface_state, ss1),
898                       pp_context->surfaces[index].s_bo);
899     dri_bo_unmap(bo);
900
901     /* destination UV surface index 8 */
902     index = 8;
903     pp_context->surfaces[index].s_bo = obj_surface->bo;
904     dri_bo_reference(pp_context->surfaces[index].s_bo);
905     bo = dri_bo_alloc(i965->intel.bufmgr, 
906                       "surface state", 
907                       sizeof(struct i965_surface_state), 
908                       4096);
909     assert(bo);
910     pp_context->surfaces[index].ss_bo = bo;
911     dri_bo_map(bo, True);
912     assert(bo->virtual);
913     ss = bo->virtual;
914     memset(ss, 0, sizeof(*ss));
915     ss->ss0.surface_type = I965_SURFACE_2D;
916     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
917     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
918     ss->ss2.width = out_w / 4 - 1;
919     ss->ss2.height = out_h / 2 - 1;
920     ss->ss3.pitch = out_wpitch - 1;
921     pp_set_surface_tiling(ss, tiling);
922     dri_bo_emit_reloc(bo,
923                       I915_GEM_DOMAIN_RENDER, 
924                       I915_GEM_DOMAIN_RENDER,
925                       out_wpitch * out_hpitch,
926                       offsetof(struct i965_surface_state, ss1),
927                       pp_context->surfaces[index].s_bo);
928     dri_bo_unmap(bo);
929
930     /* sampler state */
931     dri_bo_map(pp_context->sampler_state_table.bo, True);
932     assert(pp_context->sampler_state_table.bo->virtual);
933     sampler_state = pp_context->sampler_state_table.bo->virtual;
934
935     /* SIMD16 Y index 1 */
936     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
937     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
938     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
939     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
940     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
941
942     /* SIMD16 UV index 2 */
943     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
944     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
945     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
946     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
947     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
948
949     dri_bo_unmap(pp_context->sampler_state_table.bo);
950
951     /* private function & data */
952     pp_context->pp_x_steps = pp_scaling_x_steps;
953     pp_context->pp_y_steps = pp_scaling_y_steps;
954     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
955
956     pp_scaling_context->dest_x = dst_rect->x;
957     pp_scaling_context->dest_y = dst_rect->y;
958     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
959     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
960     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
961     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
962
963     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
964
965     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
966     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
967     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
968     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
969     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
970 }
971
972 static int
973 pp_avs_x_steps(void *private_context)
974 {
975     struct pp_avs_context *pp_avs_context = private_context;
976
977     return pp_avs_context->dest_w / 16;
978 }
979
980 static int
981 pp_avs_y_steps(void *private_context)
982 {
983     return 1;
984 }
985
986 static int
987 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
988 {
989     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
990     float src_x_steping, src_y_steping, video_step_delta;
991     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
992
993     if (tmp_w >= pp_avs_context->dest_w) {
994         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
995         pp_inline_parameter.grf6.video_step_delta = 0;
996         
997         if (x == 0) {
998             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
999                 pp_avs_context->src_normalized_x;
1000         } else {
1001             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1002             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1003             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1004                 16 * 15 * video_step_delta / 2;
1005         }
1006     } else {
1007         int n0, n1, n2, nls_left, nls_right;
1008         int factor_a = 5, factor_b = 4;
1009         float f;
1010
1011         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1012         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1013         n2 = tmp_w / (16 * factor_a);
1014         nls_left = n0 + n2;
1015         nls_right = n1 + n2;
1016         f = (float) n2 * 16 / tmp_w;
1017         
1018         if (n0 < 5) {
1019             pp_inline_parameter.grf6.video_step_delta = 0.0;
1020
1021             if (x == 0) {
1022                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1023                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1024             } else {
1025                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1026                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1027                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1028                     16 * 15 * video_step_delta / 2;
1029             }
1030         } else {
1031             if (x < nls_left) {
1032                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1033                 float a = f / (nls_left * 16 * factor_b);
1034                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1035                 
1036                 pp_inline_parameter.grf6.video_step_delta = b;
1037
1038                 if (x == 0) {
1039                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
1040                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1041                 } else {
1042                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1043                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1044                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1045                         16 * 15 * video_step_delta / 2;
1046                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1047                 }
1048             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1049                 /* scale the center linearly */
1050                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1051                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1052                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1053                     16 * 15 * video_step_delta / 2;
1054                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1055                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1056             } else {
1057                 float a = f / (nls_right * 16 * factor_b);
1058                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1059
1060                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1061                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1062                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1063                     16 * 15 * video_step_delta / 2;
1064                 pp_inline_parameter.grf6.video_step_delta = -b;
1065
1066                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1067                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1068                 else
1069                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1070             }
1071         }
1072     }
1073
1074     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1075     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
1076     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
1077     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
1078
1079     return 0;
1080 }
1081
1082 static void
1083 pp_nv12_avs_initialize(VADriverContextP ctx,
1084                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1085                        const VARectangle *src_rect, const VARectangle *dst_rect)
1086 {
1087     struct i965_driver_data *i965 = i965_driver_data(ctx);
1088     struct i965_post_processing_context *pp_context = i965->pp_context;
1089     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1090     struct object_surface *obj_surface;
1091     struct i965_surface_state *ss;
1092     struct i965_sampler_8x8 *sampler_8x8;
1093     struct i965_sampler_8x8_state *sampler_8x8_state;
1094     struct i965_surface_state2 *ss_8x8;
1095     dri_bo *bo;
1096     int index;
1097     int in_w, in_h, in_wpitch, in_hpitch;
1098     int out_w, out_h, out_wpitch, out_hpitch;
1099     unsigned int tiling, swizzle;
1100
1101     /* surface */
1102     obj_surface = SURFACE(in_surface_id);
1103     in_w = obj_surface->orig_width;
1104     in_h = obj_surface->orig_height;
1105     in_wpitch = obj_surface->width;
1106     in_hpitch = obj_surface->height;
1107     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1108
1109     /* source Y surface index 1 */
1110     index = 1;
1111     pp_context->surfaces[index].s_bo = obj_surface->bo;
1112     dri_bo_reference(pp_context->surfaces[index].s_bo);
1113     bo = dri_bo_alloc(i965->intel.bufmgr, 
1114                       "Y surface state for sample_8x8", 
1115                       sizeof(struct i965_surface_state2), 
1116                       4096);
1117     assert(bo);
1118     pp_context->surfaces[index].ss_bo = bo;
1119     dri_bo_map(bo, True);
1120     assert(bo->virtual);
1121     ss_8x8 = bo->virtual;
1122     memset(ss_8x8, 0, sizeof(*ss_8x8));
1123     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1124     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1125     ss_8x8->ss1.width = in_w - 1;
1126     ss_8x8->ss1.height = in_h - 1;
1127     ss_8x8->ss2.half_pitch_for_chroma = 0;
1128     ss_8x8->ss2.pitch = in_wpitch - 1;
1129     ss_8x8->ss2.interleave_chroma = 0;
1130     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1131     ss_8x8->ss3.x_offset_for_cb = 0;
1132     ss_8x8->ss3.y_offset_for_cb = 0;
1133     pp_set_surface2_tiling(ss_8x8, tiling);
1134     dri_bo_emit_reloc(bo,
1135                       I915_GEM_DOMAIN_RENDER, 
1136                       0,
1137                       0,
1138                       offsetof(struct i965_surface_state2, ss0),
1139                       pp_context->surfaces[index].s_bo);
1140     dri_bo_unmap(bo);
1141
1142     /* source UV surface index 2 */
1143     index = 2;
1144     pp_context->surfaces[index].s_bo = obj_surface->bo;
1145     dri_bo_reference(pp_context->surfaces[index].s_bo);
1146     bo = dri_bo_alloc(i965->intel.bufmgr, 
1147                       "UV surface state for sample_8x8", 
1148                       sizeof(struct i965_surface_state2), 
1149                       4096);
1150     assert(bo);
1151     pp_context->surfaces[index].ss_bo = bo;
1152     dri_bo_map(bo, True);
1153     assert(bo->virtual);
1154     ss_8x8 = bo->virtual;
1155     memset(ss_8x8, 0, sizeof(*ss_8x8));
1156     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + in_wpitch * in_hpitch;
1157     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1158     ss_8x8->ss1.width = in_w - 1;
1159     ss_8x8->ss1.height = in_h - 1;
1160     ss_8x8->ss2.half_pitch_for_chroma = 0;
1161     ss_8x8->ss2.pitch = in_wpitch - 1;
1162     ss_8x8->ss2.interleave_chroma = 1;
1163     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1164     ss_8x8->ss3.x_offset_for_cb = 0;
1165     ss_8x8->ss3.y_offset_for_cb = 0;
1166     pp_set_surface2_tiling(ss_8x8, tiling);
1167     dri_bo_emit_reloc(bo,
1168                       I915_GEM_DOMAIN_RENDER, 
1169                       0,
1170                       in_wpitch * in_hpitch,
1171                       offsetof(struct i965_surface_state2, ss0),
1172                       pp_context->surfaces[index].s_bo);
1173     dri_bo_unmap(bo);
1174
1175     /* destination surface */
1176     obj_surface = SURFACE(out_surface_id);
1177     out_w = obj_surface->orig_width;
1178     out_h = obj_surface->orig_height;
1179     out_wpitch = obj_surface->width;
1180     out_hpitch = obj_surface->height;
1181     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1182     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1183
1184     /* destination Y surface index 7 */
1185     index = 7;
1186     pp_context->surfaces[index].s_bo = obj_surface->bo;
1187     dri_bo_reference(pp_context->surfaces[index].s_bo);
1188     bo = dri_bo_alloc(i965->intel.bufmgr, 
1189                       "surface state", 
1190                       sizeof(struct i965_surface_state), 
1191                       4096);
1192     assert(bo);
1193     pp_context->surfaces[index].ss_bo = bo;
1194     dri_bo_map(bo, True);
1195     assert(bo->virtual);
1196     ss = bo->virtual;
1197     memset(ss, 0, sizeof(*ss));
1198     ss->ss0.surface_type = I965_SURFACE_2D;
1199     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1200     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1201     ss->ss2.width = out_w / 4 - 1;
1202     ss->ss2.height = out_h - 1;
1203     ss->ss3.pitch = out_wpitch - 1;
1204     pp_set_surface_tiling(ss, tiling);
1205     dri_bo_emit_reloc(bo,
1206                       I915_GEM_DOMAIN_RENDER, 
1207                       I915_GEM_DOMAIN_RENDER,
1208                       0,
1209                       offsetof(struct i965_surface_state, ss1),
1210                       pp_context->surfaces[index].s_bo);
1211     dri_bo_unmap(bo);
1212
1213     /* destination UV surface index 8 */
1214     index = 8;
1215     pp_context->surfaces[index].s_bo = obj_surface->bo;
1216     dri_bo_reference(pp_context->surfaces[index].s_bo);
1217     bo = dri_bo_alloc(i965->intel.bufmgr, 
1218                       "surface state", 
1219                       sizeof(struct i965_surface_state), 
1220                       4096);
1221     assert(bo);
1222     pp_context->surfaces[index].ss_bo = bo;
1223     dri_bo_map(bo, True);
1224     assert(bo->virtual);
1225     ss = bo->virtual;
1226     memset(ss, 0, sizeof(*ss));
1227     ss->ss0.surface_type = I965_SURFACE_2D;
1228     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1229     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + out_wpitch * out_hpitch;
1230     ss->ss2.width = out_w / 4 - 1;
1231     ss->ss2.height = out_h / 2 - 1;
1232     ss->ss3.pitch = out_wpitch - 1;
1233     pp_set_surface_tiling(ss, tiling);
1234     dri_bo_emit_reloc(bo,
1235                       I915_GEM_DOMAIN_RENDER, 
1236                       I915_GEM_DOMAIN_RENDER,
1237                       out_wpitch * out_hpitch,
1238                       offsetof(struct i965_surface_state, ss1),
1239                       pp_context->surfaces[index].s_bo);
1240     dri_bo_unmap(bo);
1241     
1242     /* sampler 8x8 state */
1243     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1244     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1245     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1246     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1247     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1248     sampler_8x8_state->dw136.default_sharpness_level = 0;
1249     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1250     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1251     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1252     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1253
1254     /* sampler 8x8 */
1255     dri_bo_map(pp_context->sampler_state_table.bo, True);
1256     assert(pp_context->sampler_state_table.bo->virtual);
1257     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1258     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1259
1260     /* sample_8x8 Y index 1 */
1261     index = 1;
1262     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1263     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1264     sampler_8x8[index].dw0.ief_bypass = 0;
1265     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1266     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1267     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1268     sampler_8x8[index].dw2.global_noise_estimation = 22;
1269     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1270     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1271     sampler_8x8[index].dw3.strong_edge_weight = 7;
1272     sampler_8x8[index].dw3.regular_weight = 2;
1273     sampler_8x8[index].dw3.non_edge_weight = 0;
1274     sampler_8x8[index].dw3.gain_factor = 40;
1275     sampler_8x8[index].dw4.steepness_boost = 0;
1276     sampler_8x8[index].dw4.steepness_threshold = 0;
1277     sampler_8x8[index].dw4.mr_boost = 0;
1278     sampler_8x8[index].dw4.mr_threshold = 5;
1279     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1280     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1281     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1282     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1283     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1284     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1285     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1286     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1287     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1288     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1289     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1290     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1291     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1292     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1293     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1294     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1295     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1296     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1297     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1298     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1299     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1300     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1301     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1302     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1303     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1304     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1305     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1306     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1307     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1308     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1309     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1310     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1311     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1312     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1313     sampler_8x8[index].dw13.limiter_boost = 0;
1314     sampler_8x8[index].dw13.minimum_limiter = 10;
1315     sampler_8x8[index].dw13.maximum_limiter = 11;
1316     sampler_8x8[index].dw14.clip_limiter = 130;
1317     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1318                       I915_GEM_DOMAIN_RENDER, 
1319                       0,
1320                       0,
1321                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1322                       pp_context->sampler_state_table.bo_8x8);
1323
1324     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1325     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1326     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1327     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1328     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1329     sampler_8x8_state->dw136.default_sharpness_level = 0;
1330     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1331     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1332     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1333     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1334
1335     /* sample_8x8 UV index 2 */
1336     index = 2;
1337     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1338     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1339     sampler_8x8[index].dw0.ief_bypass = 0;
1340     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1341     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1342     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1343     sampler_8x8[index].dw2.global_noise_estimation = 22;
1344     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1345     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1346     sampler_8x8[index].dw3.strong_edge_weight = 7;
1347     sampler_8x8[index].dw3.regular_weight = 2;
1348     sampler_8x8[index].dw3.non_edge_weight = 0;
1349     sampler_8x8[index].dw3.gain_factor = 40;
1350     sampler_8x8[index].dw4.steepness_boost = 0;
1351     sampler_8x8[index].dw4.steepness_threshold = 0;
1352     sampler_8x8[index].dw4.mr_boost = 0;
1353     sampler_8x8[index].dw4.mr_threshold = 5;
1354     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1355     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1356     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1357     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1358     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1359     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1360     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1361     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1362     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1363     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1364     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1365     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1366     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1367     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1368     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1369     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1370     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1371     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1372     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1373     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1374     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1375     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1376     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1377     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1378     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1379     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1380     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1381     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1382     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1383     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1384     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1385     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1386     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1387     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1388     sampler_8x8[index].dw13.limiter_boost = 0;
1389     sampler_8x8[index].dw13.minimum_limiter = 10;
1390     sampler_8x8[index].dw13.maximum_limiter = 11;
1391     sampler_8x8[index].dw14.clip_limiter = 130;
1392     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1393                       I915_GEM_DOMAIN_RENDER, 
1394                       0,
1395                       0,
1396                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1397                       pp_context->sampler_state_table.bo_8x8_uv);
1398
1399     dri_bo_unmap(pp_context->sampler_state_table.bo);
1400
1401     /* private function & data */
1402     pp_context->pp_x_steps = pp_avs_x_steps;
1403     pp_context->pp_y_steps = pp_avs_y_steps;
1404     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1405
1406     pp_avs_context->dest_x = dst_rect->x;
1407     pp_avs_context->dest_y = dst_rect->y;
1408     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1409     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1410     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1411     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1412     pp_avs_context->src_w = src_rect->width;
1413     pp_avs_context->src_h = src_rect->height;
1414
1415     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1416     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1417
1418     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1419     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1420     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1421     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1422     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1423     pp_inline_parameter.grf6.video_step_delta = 0.0;
1424 }
1425
1426 static int
1427 pp_dndi_x_steps(void *private_context)
1428 {
1429     return 1;
1430 }
1431
1432 static int
1433 pp_dndi_y_steps(void *private_context)
1434 {
1435     struct pp_dndi_context *pp_dndi_context = private_context;
1436
1437     return pp_dndi_context->dest_h / 4;
1438 }
1439
1440 static int
1441 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1442 {
1443     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1444     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1445
1446     return 0;
1447 }
1448
1449 static 
1450 void pp_nv12_dndi_initialize(VADriverContextP ctx,
1451                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1452                              const VARectangle *src_rect, const VARectangle *dst_rect)
1453 {
1454     struct i965_driver_data *i965 = i965_driver_data(ctx);
1455     struct i965_post_processing_context *pp_context = i965->pp_context;
1456     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1457     struct object_surface *obj_surface;
1458     struct i965_surface_state *ss;
1459     struct i965_surface_state2 *ss_dndi;
1460     struct i965_sampler_dndi *sampler_dndi;
1461     dri_bo *bo;
1462     int index;
1463     int w, h;
1464     int orig_w, orig_h;
1465     unsigned int tiling, swizzle;
1466
1467     /* surface */
1468     obj_surface = SURFACE(in_surface_id);
1469     orig_w = obj_surface->orig_width;
1470     orig_h = obj_surface->orig_height;
1471     w = obj_surface->width;
1472     h = obj_surface->height;
1473     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1474
1475     if (pp_context->stmm.bo == NULL) {
1476         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1477                                            "STMM surface",
1478                                            w * h,
1479                                            4096);
1480         assert(pp_context->stmm.bo);
1481     }
1482
1483     /* source UV surface index 2 */
1484     index = 2;
1485     pp_context->surfaces[index].s_bo = obj_surface->bo;
1486     dri_bo_reference(pp_context->surfaces[index].s_bo);
1487     bo = dri_bo_alloc(i965->intel.bufmgr, 
1488                       "surface state", 
1489                       sizeof(struct i965_surface_state), 
1490                       4096);
1491     assert(bo);
1492     pp_context->surfaces[index].ss_bo = bo;
1493     dri_bo_map(bo, True);
1494     assert(bo->virtual);
1495     ss = bo->virtual;
1496     memset(ss, 0, sizeof(*ss));
1497     ss->ss0.surface_type = I965_SURFACE_2D;
1498     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1499     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1500     ss->ss2.width = orig_w / 4 - 1;
1501     ss->ss2.height = orig_h / 2 - 1;
1502     ss->ss3.pitch = w - 1;
1503     pp_set_surface_tiling(ss, tiling);
1504     dri_bo_emit_reloc(bo,
1505                       I915_GEM_DOMAIN_RENDER, 
1506                       0,
1507                       w * h,
1508                       offsetof(struct i965_surface_state, ss1),
1509                       pp_context->surfaces[index].s_bo);
1510     dri_bo_unmap(bo);
1511
1512     /* source YUV surface index 4 */
1513     index = 4;
1514     pp_context->surfaces[index].s_bo = obj_surface->bo;
1515     dri_bo_reference(pp_context->surfaces[index].s_bo);
1516     bo = dri_bo_alloc(i965->intel.bufmgr, 
1517                       "YUV surface state for deinterlace ", 
1518                       sizeof(struct i965_surface_state2), 
1519                       4096);
1520     assert(bo);
1521     pp_context->surfaces[index].ss_bo = bo;
1522     dri_bo_map(bo, True);
1523     assert(bo->virtual);
1524     ss_dndi = bo->virtual;
1525     memset(ss_dndi, 0, sizeof(*ss_dndi));
1526     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1527     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1528     ss_dndi->ss1.width = w - 1;
1529     ss_dndi->ss1.height = h - 1;
1530     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1531     ss_dndi->ss2.half_pitch_for_chroma = 0;
1532     ss_dndi->ss2.pitch = w - 1;
1533     ss_dndi->ss2.interleave_chroma = 1;
1534     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1535     ss_dndi->ss2.half_pitch_for_chroma = 0;
1536     ss_dndi->ss2.tiled_surface = 0;
1537     ss_dndi->ss3.x_offset_for_cb = 0;
1538     ss_dndi->ss3.y_offset_for_cb = h;
1539     pp_set_surface2_tiling(ss_dndi, tiling);
1540     dri_bo_emit_reloc(bo,
1541                       I915_GEM_DOMAIN_RENDER, 
1542                       0,
1543                       0,
1544                       offsetof(struct i965_surface_state2, ss0),
1545                       pp_context->surfaces[index].s_bo);
1546     dri_bo_unmap(bo);
1547
1548     /* source STMM surface index 20 */
1549     index = 20;
1550     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1551     dri_bo_reference(pp_context->surfaces[index].s_bo);
1552     bo = dri_bo_alloc(i965->intel.bufmgr, 
1553                       "STMM surface state for deinterlace ", 
1554                       sizeof(struct i965_surface_state2), 
1555                       4096);
1556     assert(bo);
1557     pp_context->surfaces[index].ss_bo = bo;
1558     dri_bo_map(bo, True);
1559     assert(bo->virtual);
1560     ss = bo->virtual;
1561     memset(ss, 0, sizeof(*ss));
1562     ss->ss0.surface_type = I965_SURFACE_2D;
1563     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1564     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1565     ss->ss2.width = w - 1;
1566     ss->ss2.height = h - 1;
1567     ss->ss3.pitch = w - 1;
1568     dri_bo_emit_reloc(bo,
1569                       I915_GEM_DOMAIN_RENDER, 
1570                       I915_GEM_DOMAIN_RENDER,
1571                       0,
1572                       offsetof(struct i965_surface_state, ss1),
1573                       pp_context->surfaces[index].s_bo);
1574     dri_bo_unmap(bo);
1575
1576     /* destination surface */
1577     obj_surface = SURFACE(out_surface_id);
1578     orig_w = obj_surface->orig_width;
1579     orig_h = obj_surface->orig_height;
1580     w = obj_surface->width;
1581     h = obj_surface->height;
1582     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1583
1584     /* destination Y surface index 7 */
1585     index = 7;
1586     pp_context->surfaces[index].s_bo = obj_surface->bo;
1587     dri_bo_reference(pp_context->surfaces[index].s_bo);
1588     bo = dri_bo_alloc(i965->intel.bufmgr, 
1589                       "surface state", 
1590                       sizeof(struct i965_surface_state), 
1591                       4096);
1592     assert(bo);
1593     pp_context->surfaces[index].ss_bo = bo;
1594     dri_bo_map(bo, True);
1595     assert(bo->virtual);
1596     ss = bo->virtual;
1597     memset(ss, 0, sizeof(*ss));
1598     ss->ss0.surface_type = I965_SURFACE_2D;
1599     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1600     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1601     ss->ss2.width = orig_w / 4 - 1;
1602     ss->ss2.height = orig_h - 1;
1603     ss->ss3.pitch = w - 1;
1604     pp_set_surface_tiling(ss, tiling);
1605     dri_bo_emit_reloc(bo,
1606                       I915_GEM_DOMAIN_RENDER, 
1607                       I915_GEM_DOMAIN_RENDER,
1608                       0,
1609                       offsetof(struct i965_surface_state, ss1),
1610                       pp_context->surfaces[index].s_bo);
1611     dri_bo_unmap(bo);
1612
1613     /* destination UV surface index 8 */
1614     index = 8;
1615     pp_context->surfaces[index].s_bo = obj_surface->bo;
1616     dri_bo_reference(pp_context->surfaces[index].s_bo);
1617     bo = dri_bo_alloc(i965->intel.bufmgr, 
1618                       "surface state", 
1619                       sizeof(struct i965_surface_state), 
1620                       4096);
1621     assert(bo);
1622     pp_context->surfaces[index].ss_bo = bo;
1623     dri_bo_map(bo, True);
1624     assert(bo->virtual);
1625     ss = bo->virtual;
1626     memset(ss, 0, sizeof(*ss));
1627     ss->ss0.surface_type = I965_SURFACE_2D;
1628     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1629     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1630     ss->ss2.width = orig_w / 4 - 1;
1631     ss->ss2.height = orig_h / 2 - 1;
1632     ss->ss3.pitch = w - 1;
1633     pp_set_surface_tiling(ss, tiling);
1634     dri_bo_emit_reloc(bo,
1635                       I915_GEM_DOMAIN_RENDER, 
1636                       I915_GEM_DOMAIN_RENDER,
1637                       w * h,
1638                       offsetof(struct i965_surface_state, ss1),
1639                       pp_context->surfaces[index].s_bo);
1640     dri_bo_unmap(bo);
1641
1642     /* sampler dndi */
1643     dri_bo_map(pp_context->sampler_state_table.bo, True);
1644     assert(pp_context->sampler_state_table.bo->virtual);
1645     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1646     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1647
1648     /* sample dndi index 1 */
1649     index = 0;
1650     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1651     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1652     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1653     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1654
1655     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1656     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1657     sampler_dndi[index].dw1.stmm_c2 = 0;
1658     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1659     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1660
1661     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1662     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1663     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1664     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1665
1666     sampler_dndi[index].dw3.maximum_stmm = 128;
1667     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1668     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1669     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1670     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1671
1672     sampler_dndi[index].dw4.sdi_delta = 8;
1673     sampler_dndi[index].dw4.sdi_threshold = 128;
1674     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1675     sampler_dndi[index].dw4.stmm_shift_up = 0;
1676     sampler_dndi[index].dw4.stmm_shift_down = 0;
1677     sampler_dndi[index].dw4.minimum_stmm = 0;
1678
1679     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1680     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1681     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1682     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1683
1684     sampler_dndi[index].dw6.dn_enable = 1;
1685     sampler_dndi[index].dw6.di_enable = 1;
1686     sampler_dndi[index].dw6.di_partial = 0;
1687     sampler_dndi[index].dw6.dndi_top_first = 1;
1688     sampler_dndi[index].dw6.dndi_stream_id = 1;
1689     sampler_dndi[index].dw6.dndi_first_frame = 1;
1690     sampler_dndi[index].dw6.progressive_dn = 0;
1691     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1692     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1693     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1694
1695     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1696     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1697     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1698     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1699
1700     dri_bo_unmap(pp_context->sampler_state_table.bo);
1701
1702     /* private function & data */
1703     pp_context->pp_x_steps = pp_dndi_x_steps;
1704     pp_context->pp_y_steps = pp_dndi_y_steps;
1705     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1706
1707     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1708     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1709     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1710     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1711
1712     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1713     pp_inline_parameter.grf5.number_blocks = w / 16;
1714     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1715     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1716
1717     pp_dndi_context->dest_w = w;
1718     pp_dndi_context->dest_h = h;
1719 }
1720
1721 static void
1722 ironlake_pp_initialize(
1723     VADriverContextP   ctx,
1724     VASurfaceID        in_surface_id,
1725     VASurfaceID        out_surface_id,
1726     const VARectangle *src_rect,
1727     const VARectangle *dst_rect,
1728     int                pp_index
1729 )
1730 {
1731     struct i965_driver_data *i965 = i965_driver_data(ctx);
1732     struct i965_post_processing_context *pp_context = i965->pp_context;
1733     struct pp_module *pp_module;
1734     dri_bo *bo;
1735     int i;
1736
1737     dri_bo_unreference(pp_context->curbe.bo);
1738     bo = dri_bo_alloc(i965->intel.bufmgr,
1739                       "constant buffer",
1740                       4096, 
1741                       4096);
1742     assert(bo);
1743     pp_context->curbe.bo = bo;
1744
1745     dri_bo_unreference(pp_context->binding_table.bo);
1746     bo = dri_bo_alloc(i965->intel.bufmgr, 
1747                       "binding table",
1748                       sizeof(unsigned int), 
1749                       4096);
1750     assert(bo);
1751     pp_context->binding_table.bo = bo;
1752
1753     dri_bo_unreference(pp_context->idrt.bo);
1754     bo = dri_bo_alloc(i965->intel.bufmgr, 
1755                       "interface discriptor", 
1756                       sizeof(struct i965_interface_descriptor), 
1757                       4096);
1758     assert(bo);
1759     pp_context->idrt.bo = bo;
1760     pp_context->idrt.num_interface_descriptors = 0;
1761
1762     dri_bo_unreference(pp_context->sampler_state_table.bo);
1763     bo = dri_bo_alloc(i965->intel.bufmgr, 
1764                       "sampler state table", 
1765                       4096,
1766                       4096);
1767     assert(bo);
1768     dri_bo_map(bo, True);
1769     memset(bo->virtual, 0, bo->size);
1770     dri_bo_unmap(bo);
1771     pp_context->sampler_state_table.bo = bo;
1772
1773     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1774     bo = dri_bo_alloc(i965->intel.bufmgr, 
1775                       "sampler 8x8 state ",
1776                       4096,
1777                       4096);
1778     assert(bo);
1779     pp_context->sampler_state_table.bo_8x8 = bo;
1780
1781     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1782     bo = dri_bo_alloc(i965->intel.bufmgr, 
1783                       "sampler 8x8 state ",
1784                       4096,
1785                       4096);
1786     assert(bo);
1787     pp_context->sampler_state_table.bo_8x8_uv = bo;
1788
1789     dri_bo_unreference(pp_context->vfe_state.bo);
1790     bo = dri_bo_alloc(i965->intel.bufmgr, 
1791                       "vfe state", 
1792                       sizeof(struct i965_vfe_state), 
1793                       4096);
1794     assert(bo);
1795     pp_context->vfe_state.bo = bo;
1796     
1797     for (i = 0; i < MAX_PP_SURFACES; i++) {
1798         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1799         pp_context->surfaces[i].ss_bo = NULL;
1800
1801         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1802         pp_context->surfaces[i].s_bo = NULL;
1803     }
1804
1805     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1806     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1807     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1808     pp_context->current_pp = pp_index;
1809     pp_module = &pp_context->pp_modules[pp_index];
1810     
1811     if (pp_module->initialize)
1812         pp_module->initialize(ctx, in_surface_id, out_surface_id,
1813                               src_rect, dst_rect);
1814 }
1815
1816 static void
1817 ironlake_post_processing(
1818     VADriverContextP   ctx,
1819     VASurfaceID        in_surface_id,
1820     VASurfaceID        out_surface_id,
1821     const VARectangle *src_rect,
1822     const VARectangle *dst_rect,
1823     int                pp_index
1824 )
1825 {
1826     ironlake_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1827     ironlake_pp_states_setup(ctx);
1828     ironlake_pp_pipeline_setup(ctx);
1829 }
1830
1831 static void
1832 gen6_pp_initialize(
1833     VADriverContextP   ctx,
1834     VASurfaceID        in_surface_id,
1835     VASurfaceID        out_surface_id,
1836     const VARectangle *src_rect,
1837     const VARectangle *dst_rect,
1838     int                pp_index
1839 )
1840 {
1841     struct i965_driver_data *i965 = i965_driver_data(ctx);
1842     struct i965_post_processing_context *pp_context = i965->pp_context;
1843     struct pp_module *pp_module;
1844     dri_bo *bo;
1845     int i;
1846
1847     dri_bo_unreference(pp_context->curbe.bo);
1848     bo = dri_bo_alloc(i965->intel.bufmgr,
1849                       "constant buffer",
1850                       4096, 
1851                       4096);
1852     assert(bo);
1853     pp_context->curbe.bo = bo;
1854
1855     dri_bo_unreference(pp_context->binding_table.bo);
1856     bo = dri_bo_alloc(i965->intel.bufmgr, 
1857                       "binding table",
1858                       sizeof(unsigned int), 
1859                       4096);
1860     assert(bo);
1861     pp_context->binding_table.bo = bo;
1862
1863     dri_bo_unreference(pp_context->idrt.bo);
1864     bo = dri_bo_alloc(i965->intel.bufmgr, 
1865                       "interface discriptor", 
1866                       sizeof(struct gen6_interface_descriptor_data), 
1867                       4096);
1868     assert(bo);
1869     pp_context->idrt.bo = bo;
1870     pp_context->idrt.num_interface_descriptors = 0;
1871
1872     dri_bo_unreference(pp_context->sampler_state_table.bo);
1873     bo = dri_bo_alloc(i965->intel.bufmgr, 
1874                       "sampler state table", 
1875                       4096,
1876                       4096);
1877     assert(bo);
1878     dri_bo_map(bo, True);
1879     memset(bo->virtual, 0, bo->size);
1880     dri_bo_unmap(bo);
1881     pp_context->sampler_state_table.bo = bo;
1882
1883     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1884     bo = dri_bo_alloc(i965->intel.bufmgr, 
1885                       "sampler 8x8 state ",
1886                       4096,
1887                       4096);
1888     assert(bo);
1889     pp_context->sampler_state_table.bo_8x8 = bo;
1890
1891     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1892     bo = dri_bo_alloc(i965->intel.bufmgr, 
1893                       "sampler 8x8 state ",
1894                       4096,
1895                       4096);
1896     assert(bo);
1897     pp_context->sampler_state_table.bo_8x8_uv = bo;
1898
1899     dri_bo_unreference(pp_context->vfe_state.bo);
1900     bo = dri_bo_alloc(i965->intel.bufmgr, 
1901                       "vfe state", 
1902                       sizeof(struct i965_vfe_state), 
1903                       4096);
1904     assert(bo);
1905     pp_context->vfe_state.bo = bo;
1906     
1907     for (i = 0; i < MAX_PP_SURFACES; i++) {
1908         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1909         pp_context->surfaces[i].ss_bo = NULL;
1910
1911         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1912         pp_context->surfaces[i].s_bo = NULL;
1913     }
1914
1915     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1916     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1917     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1918     pp_context->current_pp = pp_index;
1919     pp_module = &pp_context->pp_modules[pp_index];
1920     
1921     if (pp_module->initialize)
1922         pp_module->initialize(ctx, in_surface_id, out_surface_id,
1923                               src_rect, dst_rect);
1924 }
1925
1926 static void
1927 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1928 {
1929     unsigned int *binding_table;
1930     dri_bo *bo = pp_context->binding_table.bo;
1931     int i;
1932
1933     dri_bo_map(bo, 1);
1934     assert(bo->virtual);
1935     binding_table = bo->virtual;
1936     memset(binding_table, 0, bo->size);
1937
1938     for (i = 0; i < MAX_PP_SURFACES; i++) {
1939         if (pp_context->surfaces[i].ss_bo) {
1940             assert(pp_context->surfaces[i].s_bo);
1941
1942             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1943             dri_bo_emit_reloc(bo,
1944                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1945                               0,
1946                               i * sizeof(*binding_table),
1947                               pp_context->surfaces[i].ss_bo);
1948         }
1949     
1950     }
1951
1952     dri_bo_unmap(bo);
1953 }
1954
1955 static void
1956 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1957 {
1958     struct gen6_interface_descriptor_data *desc;
1959     dri_bo *bo;
1960     int pp_index = pp_context->current_pp;
1961
1962     bo = pp_context->idrt.bo;
1963     dri_bo_map(bo, True);
1964     assert(bo->virtual);
1965     desc = bo->virtual;
1966     memset(desc, 0, sizeof(*desc));
1967     desc->desc0.kernel_start_pointer = 
1968         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1969     desc->desc1.single_program_flow = 1;
1970     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1971     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1972     desc->desc2.sampler_state_pointer = 
1973         pp_context->sampler_state_table.bo->offset >> 5;
1974     desc->desc3.binding_table_entry_count = 0;
1975     desc->desc3.binding_table_pointer = 
1976         pp_context->binding_table.bo->offset >> 5; /*reloc */
1977     desc->desc4.constant_urb_entry_read_offset = 0;
1978     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1979
1980     dri_bo_emit_reloc(bo,
1981                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1982                       0,
1983                       offsetof(struct gen6_interface_descriptor_data, desc0),
1984                       pp_context->pp_modules[pp_index].kernel.bo);
1985
1986     dri_bo_emit_reloc(bo,
1987                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1988                       desc->desc2.sampler_count << 2,
1989                       offsetof(struct gen6_interface_descriptor_data, desc2),
1990                       pp_context->sampler_state_table.bo);
1991
1992     dri_bo_emit_reloc(bo,
1993                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1994                       desc->desc3.binding_table_entry_count,
1995                       offsetof(struct gen6_interface_descriptor_data, desc3),
1996                       pp_context->binding_table.bo);
1997
1998     dri_bo_unmap(bo);
1999     pp_context->idrt.num_interface_descriptors++;
2000 }
2001
2002 static void
2003 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2004 {
2005     unsigned char *constant_buffer;
2006
2007     assert(sizeof(pp_static_parameter) == 128);
2008     dri_bo_map(pp_context->curbe.bo, 1);
2009     assert(pp_context->curbe.bo->virtual);
2010     constant_buffer = pp_context->curbe.bo->virtual;
2011     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2012     dri_bo_unmap(pp_context->curbe.bo);
2013 }
2014
2015 static void
2016 gen6_pp_states_setup(VADriverContextP ctx)
2017 {
2018     struct i965_driver_data *i965 = i965_driver_data(ctx);
2019     struct i965_post_processing_context *pp_context = i965->pp_context;
2020
2021     gen6_pp_binding_table(pp_context);
2022     gen6_pp_interface_descriptor_table(pp_context);
2023     gen6_pp_upload_constants(pp_context);
2024 }
2025
2026 static void
2027 gen6_pp_pipeline_select(VADriverContextP ctx)
2028 {
2029     struct i965_driver_data *i965 = i965_driver_data(ctx);
2030     struct intel_batchbuffer *batch = i965->batch;
2031
2032     BEGIN_BATCH(batch, 1);
2033     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2034     ADVANCE_BATCH(batch);
2035 }
2036
2037 static void
2038 gen6_pp_state_base_address(VADriverContextP ctx)
2039 {
2040     struct i965_driver_data *i965 = i965_driver_data(ctx);
2041     struct intel_batchbuffer *batch = i965->batch;
2042
2043     BEGIN_BATCH(batch, 10);
2044     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2045     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2046     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2047     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2048     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2049     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2050     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2051     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2052     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2053     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2054     ADVANCE_BATCH(batch);
2055 }
2056
2057 static void
2058 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2059 {
2060     struct i965_driver_data *i965 = i965_driver_data(ctx);
2061     struct intel_batchbuffer *batch = i965->batch;
2062
2063     BEGIN_BATCH(batch, 8);
2064     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2065     OUT_BATCH(batch, 0);
2066     OUT_BATCH(batch,
2067               (pp_context->urb.num_vfe_entries - 1) << 16 |
2068               pp_context->urb.num_vfe_entries << 8);
2069     OUT_BATCH(batch, 0);
2070     OUT_BATCH(batch,
2071               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2072               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2073     OUT_BATCH(batch, 0);
2074     OUT_BATCH(batch, 0);
2075     OUT_BATCH(batch, 0);
2076     ADVANCE_BATCH(batch);
2077 }
2078
2079 static void
2080 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2081 {
2082     struct i965_driver_data *i965 = i965_driver_data(ctx);
2083     struct intel_batchbuffer *batch = i965->batch;
2084
2085     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2086
2087     BEGIN_BATCH(batch, 4);
2088     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2089     OUT_BATCH(batch, 0);
2090     OUT_BATCH(batch,
2091               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2092     OUT_RELOC(batch, 
2093               pp_context->curbe.bo,
2094               I915_GEM_DOMAIN_INSTRUCTION, 0,
2095               0);
2096     ADVANCE_BATCH(batch);
2097 }
2098
2099 static void
2100 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2101 {
2102     struct i965_driver_data *i965 = i965_driver_data(ctx);
2103     struct intel_batchbuffer *batch = i965->batch;
2104
2105     BEGIN_BATCH(batch, 4);
2106     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2107     OUT_BATCH(batch, 0);
2108     OUT_BATCH(batch,
2109               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2110     OUT_RELOC(batch, 
2111               pp_context->idrt.bo,
2112               I915_GEM_DOMAIN_INSTRUCTION, 0,
2113               0);
2114     ADVANCE_BATCH(batch);
2115 }
2116
2117 static void
2118 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2119 {
2120     struct i965_driver_data *i965 = i965_driver_data(ctx);
2121     struct intel_batchbuffer *batch = i965->batch;
2122     int x, x_steps, y, y_steps;
2123
2124     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2125     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2126
2127     for (y = 0; y < y_steps; y++) {
2128         for (x = 0; x < x_steps; x++) {
2129             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2130                 BEGIN_BATCH(batch, 22);
2131                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2132                 OUT_BATCH(batch, 0);
2133                 OUT_BATCH(batch, 0); /* no indirect data */
2134                 OUT_BATCH(batch, 0);
2135                 OUT_BATCH(batch, 0); /* scoreboard */
2136                 OUT_BATCH(batch, 0);
2137
2138                 /* inline data grf 5-6 */
2139                 assert(sizeof(pp_inline_parameter) == 64);
2140                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2141
2142                 ADVANCE_BATCH(batch);
2143             }
2144         }
2145     }
2146 }
2147
2148 static void
2149 gen6_pp_pipeline_setup(VADriverContextP ctx)
2150 {
2151     struct i965_driver_data *i965 = i965_driver_data(ctx);
2152     struct intel_batchbuffer *batch = i965->batch;
2153     struct i965_post_processing_context *pp_context = i965->pp_context;
2154
2155     intel_batchbuffer_start_atomic(batch, 0x1000);
2156     intel_batchbuffer_emit_mi_flush(batch);
2157     gen6_pp_pipeline_select(ctx);
2158     gen6_pp_curbe_load(ctx, pp_context);
2159     gen6_interface_descriptor_load(ctx, pp_context);
2160     gen6_pp_state_base_address(ctx);
2161     gen6_pp_vfe_state(ctx, pp_context);
2162     gen6_pp_object_walker(ctx, pp_context);
2163     intel_batchbuffer_end_atomic(batch);
2164 }
2165
2166 static void
2167 gen6_post_processing(
2168     VADriverContextP   ctx,
2169     VASurfaceID        in_surface_id,
2170     VASurfaceID        out_surface_id,
2171     const VARectangle *src_rect,
2172     const VARectangle *dst_rect,
2173     int                pp_index
2174 )
2175 {
2176     gen6_pp_initialize(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2177     gen6_pp_states_setup(ctx);
2178     gen6_pp_pipeline_setup(ctx);
2179 }
2180
2181 static void
2182 i965_post_processing_internal(
2183     VADriverContextP   ctx,
2184     VASurfaceID        in_surface_id,
2185     VASurfaceID        out_surface_id,
2186     const VARectangle *src_rect,
2187     const VARectangle *dst_rect,
2188     int                pp_index
2189 )
2190 {
2191     struct i965_driver_data *i965 = i965_driver_data(ctx);
2192
2193     if (IS_GEN6(i965->intel.device_id) ||
2194         IS_GEN7(i965->intel.device_id))
2195         gen6_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2196     else
2197         ironlake_post_processing(ctx, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
2198 }
2199
2200 VAStatus 
2201 i965_DestroySurfaces(VADriverContextP ctx,
2202                      VASurfaceID *surface_list,
2203                      int num_surfaces);
2204 VAStatus 
2205 i965_CreateSurfaces(VADriverContextP ctx,
2206                     int width,
2207                     int height,
2208                     int format,
2209                     int num_surfaces,
2210                     VASurfaceID *surfaces);
2211 VASurfaceID
2212 i965_post_processing(
2213     VADriverContextP   ctx,
2214     VASurfaceID        surface,
2215     const VARectangle *src_rect,
2216     const VARectangle *dst_rect,
2217     unsigned int       flags,
2218     int               *has_done_scaling  
2219 )
2220 {
2221     struct i965_driver_data *i965 = i965_driver_data(ctx);
2222     VASurfaceID in_surface_id = surface;
2223     VASurfaceID out_surface_id = VA_INVALID_ID;
2224
2225     if (HAS_PP(i965)) {
2226         /* Currently only support post processing for NV12 surface */
2227         if (i965->render_state.interleaved_uv) {
2228             struct object_surface *obj_surface;
2229             VAStatus status;
2230
2231             if (flags & I965_PP_FLAG_DEINTERLACING) {
2232                 obj_surface = SURFACE(in_surface_id);
2233                 status = i965_CreateSurfaces(ctx,
2234                                              obj_surface->orig_width,
2235                                              obj_surface->orig_height,
2236                                              VA_RT_FORMAT_YUV420,
2237                                              1,
2238                                              &out_surface_id);
2239                 assert(status == VA_STATUS_SUCCESS);
2240                 obj_surface = SURFACE(out_surface_id);
2241                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2242                 i965_post_processing_internal(ctx,
2243                                               in_surface_id, out_surface_id,
2244                                               src_rect, dst_rect,
2245                                               PP_NV12_DNDI);
2246             }
2247
2248             if (flags & I965_PP_FLAG_AVS) {
2249                 struct i965_render_state *render_state = &i965->render_state;
2250                 struct intel_region *dest_region = render_state->draw_region;
2251
2252                 if (out_surface_id != VA_INVALID_ID)
2253                     in_surface_id = out_surface_id;
2254
2255                 status = i965_CreateSurfaces(ctx,
2256                                              dest_region->width,
2257                                              dest_region->height,
2258                                              VA_RT_FORMAT_YUV420,
2259                                              1,
2260                                              &out_surface_id);
2261                 assert(status == VA_STATUS_SUCCESS);
2262                 obj_surface = SURFACE(out_surface_id);
2263                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2264                 i965_post_processing_internal(ctx,
2265                                               in_surface_id, out_surface_id,
2266                                               src_rect, dst_rect,
2267                                               PP_NV12_AVS);
2268
2269                 if (in_surface_id != surface)
2270                     i965_DestroySurfaces(ctx, &in_surface_id, 1);
2271                 
2272                 *has_done_scaling = 1;
2273             }
2274         }
2275     }
2276
2277     return out_surface_id;
2278 }       
2279
2280 Bool
2281 i965_post_processing_terminate(VADriverContextP ctx)
2282 {
2283     struct i965_driver_data *i965 = i965_driver_data(ctx);
2284     struct i965_post_processing_context *pp_context = i965->pp_context;
2285     int i;
2286
2287     if (HAS_PP(i965)) {
2288         if (pp_context) {
2289             dri_bo_unreference(pp_context->curbe.bo);
2290             pp_context->curbe.bo = NULL;
2291
2292             for (i = 0; i < MAX_PP_SURFACES; i++) {
2293                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2294                 pp_context->surfaces[i].ss_bo = NULL;
2295
2296                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2297                 pp_context->surfaces[i].s_bo = NULL;
2298             }
2299
2300             dri_bo_unreference(pp_context->sampler_state_table.bo);
2301             pp_context->sampler_state_table.bo = NULL;
2302
2303             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2304             pp_context->sampler_state_table.bo_8x8 = NULL;
2305
2306             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2307             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2308
2309             dri_bo_unreference(pp_context->binding_table.bo);
2310             pp_context->binding_table.bo = NULL;
2311
2312             dri_bo_unreference(pp_context->idrt.bo);
2313             pp_context->idrt.bo = NULL;
2314             pp_context->idrt.num_interface_descriptors = 0;
2315
2316             dri_bo_unreference(pp_context->vfe_state.bo);
2317             pp_context->vfe_state.bo = NULL;
2318
2319             dri_bo_unreference(pp_context->stmm.bo);
2320             pp_context->stmm.bo = NULL;
2321
2322             for (i = 0; i < NUM_PP_MODULES; i++) {
2323                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2324
2325                 dri_bo_unreference(pp_module->kernel.bo);
2326                 pp_module->kernel.bo = NULL;
2327             }
2328
2329             free(pp_context);
2330         }
2331
2332         i965->pp_context = NULL;
2333     }
2334
2335     return True;
2336 }
2337
2338 Bool
2339 i965_post_processing_init(VADriverContextP ctx)
2340 {
2341     struct i965_driver_data *i965 = i965_driver_data(ctx);
2342     struct i965_post_processing_context *pp_context = i965->pp_context;
2343     int i;
2344
2345     if (HAS_PP(i965)) {
2346         if (pp_context == NULL) {
2347             pp_context = calloc(1, sizeof(*pp_context));
2348             i965->pp_context = pp_context;
2349
2350             pp_context->urb.size = URB_SIZE((&i965->intel));
2351             pp_context->urb.num_vfe_entries = 32;
2352             pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2353             pp_context->urb.num_cs_entries = 1;
2354             pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2355             pp_context->urb.vfe_start = 0;
2356             pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2357                 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2358             assert(pp_context->urb.cs_start + 
2359                    pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2360
2361             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2362             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2363
2364             if (IS_GEN6(i965->intel.device_id) ||
2365                 IS_GEN7(i965->intel.device_id))
2366                 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2367             else if (IS_IRONLAKE(i965->intel.device_id))
2368                 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2369
2370             for (i = 0; i < NUM_PP_MODULES; i++) {
2371                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2372                 dri_bo_unreference(pp_module->kernel.bo);
2373                 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2374                                                     pp_module->kernel.name,
2375                                                     pp_module->kernel.size,
2376                                                     4096);
2377                 assert(pp_module->kernel.bo);
2378                 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2379             }
2380         }
2381     }
2382
2383     return True;
2384 }