f32bafdac5c0fd20a2449f2fbad942db82dd6746
[platform/upstream/libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id))
46
47 static const uint32_t pp_null_gen5[][4] = {
48 #include "shaders/post_processing/null.g4b.gen5"
49 };
50
51 static const uint32_t pp_nv12_load_save_gen5[][4] = {
52 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
53 };
54
55 static const uint32_t pp_nv12_scaling_gen5[][4] = {
56 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
57 };
58
59 static const uint32_t pp_nv12_avs_gen5[][4] = {
60 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
61 };
62
63 static const uint32_t pp_nv12_dndi_gen5[][4] = {
64 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
65 };
66
67 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
68                                unsigned short srcw, unsigned short srch,
69                                unsigned short destw, unsigned short desth);
70 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
71                                    unsigned short srcw, unsigned short srch,
72                                    unsigned short destw, unsigned short desth);
73 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
74                                        unsigned short srcw, unsigned short srch,
75                                        unsigned short destw, unsigned short desth);
76 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
77                                          unsigned short srcw, unsigned short srch,
78                                          unsigned short destw, unsigned short desth);
79 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
80                                     unsigned short srcw, unsigned short srch,
81                                     unsigned short destw, unsigned short desth);
82
83 static struct pp_module pp_modules_gen5[] = {
84     {
85         {
86             "NULL module (for testing)",
87             PP_NULL,
88             pp_null_gen5,
89             sizeof(pp_null_gen5),
90             NULL,
91         },
92
93         pp_null_initialize,
94     },
95
96     {
97         {
98             "NV12 Load & Save module",
99             PP_NV12_LOAD_SAVE,
100             pp_nv12_load_save_gen5,
101             sizeof(pp_nv12_load_save_gen5),
102             NULL,
103         },
104
105         pp_nv12_load_save_initialize,
106     },
107
108     {
109         {
110             "NV12 Scaling module",
111             PP_NV12_SCALING,
112             pp_nv12_scaling_gen5,
113             sizeof(pp_nv12_scaling_gen5),
114             NULL,
115         },
116
117         pp_nv12_scaling_initialize,
118     },
119
120     {
121         {
122             "NV12 AVS module",
123             PP_NV12_AVS,
124             pp_nv12_avs_gen5,
125             sizeof(pp_nv12_avs_gen5),
126             NULL,
127         },
128
129         pp_nv12_avs_initialize,
130     },
131
132     {
133         {
134             "NV12 DNDI module",
135             PP_NV12_DNDI,
136             pp_nv12_dndi_gen5,
137             sizeof(pp_nv12_dndi_gen5),
138             NULL,
139         },
140
141         pp_nv12_dndi_initialize,
142     },
143 };
144
145 static const uint32_t pp_null_gen6[][4] = {
146 #include "shaders/post_processing/null.g6b"
147 };
148
149 static const uint32_t pp_nv12_load_save_gen6[][4] = {
150 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
151 };
152
153 static const uint32_t pp_nv12_scaling_gen6[][4] = {
154 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
155 };
156
157 static const uint32_t pp_nv12_avs_gen6[][4] = {
158 #include "shaders/post_processing/nv12_avs_nv12.g6b"
159 };
160
161 static const uint32_t pp_nv12_dndi_gen6[][4] = {
162 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
163 };
164
165 static struct pp_module pp_modules_gen6[] = {
166     {
167         {
168             "NULL module (for testing)",
169             PP_NULL,
170             pp_null_gen6,
171             sizeof(pp_null_gen6),
172             NULL,
173         },
174
175         pp_null_initialize,
176     },
177
178     {
179         {
180             "NV12 Load & Save module",
181             PP_NV12_LOAD_SAVE,
182             pp_nv12_load_save_gen6,
183             sizeof(pp_nv12_load_save_gen6),
184             NULL,
185         },
186
187         pp_nv12_load_save_initialize,
188     },
189
190     {
191         {
192             "NV12 Scaling module",
193             PP_NV12_SCALING,
194             pp_nv12_scaling_gen6,
195             sizeof(pp_nv12_scaling_gen6),
196             NULL,
197         },
198
199         pp_nv12_scaling_initialize,
200     },
201
202     {
203         {
204             "NV12 AVS module",
205             PP_NV12_AVS,
206             pp_nv12_avs_gen6,
207             sizeof(pp_nv12_avs_gen6),
208             NULL,
209         },
210
211         pp_nv12_avs_initialize,
212     },
213
214     {
215         {
216             "NV12 DNDI module",
217             PP_NV12_DNDI,
218             pp_nv12_dndi_gen6,
219             sizeof(pp_nv12_dndi_gen6),
220             NULL,
221         },
222
223         pp_nv12_dndi_initialize,
224     },
225 };
226
227 #define pp_static_parameter     pp_context->pp_static_parameter
228 #define pp_inline_parameter     pp_context->pp_inline_parameter
229
230 static void
231 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
232 {
233     switch (tiling) {
234     case I915_TILING_NONE:
235         ss->ss3.tiled_surface = 0;
236         ss->ss3.tile_walk = 0;
237         break;
238     case I915_TILING_X:
239         ss->ss3.tiled_surface = 1;
240         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
241         break;
242     case I915_TILING_Y:
243         ss->ss3.tiled_surface = 1;
244         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
245         break;
246     }
247 }
248
249 static void
250 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
251 {
252     switch (tiling) {
253     case I915_TILING_NONE:
254         ss->ss2.tiled_surface = 0;
255         ss->ss2.tile_walk = 0;
256         break;
257     case I915_TILING_X:
258         ss->ss2.tiled_surface = 1;
259         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
260         break;
261     case I915_TILING_Y:
262         ss->ss2.tiled_surface = 1;
263         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
264         break;
265     }
266 }
267
268 static void
269 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
270 {
271
272 }
273
274 static void
275 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
276 {
277     struct i965_interface_descriptor *desc;
278     dri_bo *bo;
279     int pp_index = pp_context->current_pp;
280
281     bo = pp_context->idrt.bo;
282     dri_bo_map(bo, 1);
283     assert(bo->virtual);
284     desc = bo->virtual;
285     memset(desc, 0, sizeof(*desc));
286     desc->desc0.grf_reg_blocks = 10;
287     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
288     desc->desc1.const_urb_entry_read_offset = 0;
289     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
290     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
291     desc->desc2.sampler_count = 0;
292     desc->desc3.binding_table_entry_count = 0;
293     desc->desc3.binding_table_pointer = 
294         pp_context->binding_table.bo->offset >> 5; /*reloc */
295
296     dri_bo_emit_reloc(bo,
297                       I915_GEM_DOMAIN_INSTRUCTION, 0,
298                       desc->desc0.grf_reg_blocks,
299                       offsetof(struct i965_interface_descriptor, desc0),
300                       pp_context->pp_modules[pp_index].kernel.bo);
301
302     dri_bo_emit_reloc(bo,
303                       I915_GEM_DOMAIN_INSTRUCTION, 0,
304                       desc->desc2.sampler_count << 2,
305                       offsetof(struct i965_interface_descriptor, desc2),
306                       pp_context->sampler_state_table.bo);
307
308     dri_bo_emit_reloc(bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, 0,
310                       desc->desc3.binding_table_entry_count,
311                       offsetof(struct i965_interface_descriptor, desc3),
312                       pp_context->binding_table.bo);
313
314     dri_bo_unmap(bo);
315     pp_context->idrt.num_interface_descriptors++;
316 }
317
318 static void
319 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
320 {
321     unsigned int *binding_table;
322     dri_bo *bo = pp_context->binding_table.bo;
323     int i;
324
325     dri_bo_map(bo, 1);
326     assert(bo->virtual);
327     binding_table = bo->virtual;
328     memset(binding_table, 0, bo->size);
329
330     for (i = 0; i < MAX_PP_SURFACES; i++) {
331         if (pp_context->surfaces[i].ss_bo) {
332             assert(pp_context->surfaces[i].s_bo);
333
334             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
335             dri_bo_emit_reloc(bo,
336                               I915_GEM_DOMAIN_INSTRUCTION, 0,
337                               0,
338                               i * sizeof(*binding_table),
339                               pp_context->surfaces[i].ss_bo);
340         }
341     
342     }
343
344     dri_bo_unmap(bo);
345 }
346
347 static void
348 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
349 {
350     struct i965_vfe_state *vfe_state;
351     dri_bo *bo;
352
353     bo = pp_context->vfe_state.bo;
354     dri_bo_map(bo, 1);
355     assert(bo->virtual);
356     vfe_state = bo->virtual;
357     memset(vfe_state, 0, sizeof(*vfe_state));
358     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
359     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
360     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
361     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
362     vfe_state->vfe1.children_present = 0;
363     vfe_state->vfe2.interface_descriptor_base = 
364         pp_context->idrt.bo->offset >> 4; /* reloc */
365     dri_bo_emit_reloc(bo,
366                       I915_GEM_DOMAIN_INSTRUCTION, 0,
367                       0,
368                       offsetof(struct i965_vfe_state, vfe2),
369                       pp_context->idrt.bo);
370     dri_bo_unmap(bo);
371 }
372
373 static void
374 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
375 {
376     unsigned char *constant_buffer;
377
378     assert(sizeof(pp_static_parameter) == 128);
379     dri_bo_map(pp_context->curbe.bo, 1);
380     assert(pp_context->curbe.bo->virtual);
381     constant_buffer = pp_context->curbe.bo->virtual;
382     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
383     dri_bo_unmap(pp_context->curbe.bo);
384 }
385
386 static void
387 ironlake_pp_states_setup(VADriverContextP ctx)
388 {
389     struct i965_driver_data *i965 = i965_driver_data(ctx);
390     struct i965_post_processing_context *pp_context = i965->pp_context;
391
392     ironlake_pp_surface_state(pp_context);
393     ironlake_pp_binding_table(pp_context);
394     ironlake_pp_interface_descriptor_table(pp_context);
395     ironlake_pp_vfe_state(pp_context);
396     ironlake_pp_upload_constants(pp_context);
397 }
398
399 static void
400 ironlake_pp_pipeline_select(VADriverContextP ctx)
401 {
402     struct i965_driver_data *i965 = i965_driver_data(ctx);
403     struct intel_batchbuffer *batch = i965->batch;
404
405     BEGIN_BATCH(batch, 1);
406     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
407     ADVANCE_BATCH(batch);
408 }
409
410 static void
411 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
412 {
413     struct i965_driver_data *i965 = i965_driver_data(ctx);
414     struct intel_batchbuffer *batch = i965->batch;
415     unsigned int vfe_fence, cs_fence;
416
417     vfe_fence = pp_context->urb.cs_start;
418     cs_fence = pp_context->urb.size;
419
420     BEGIN_BATCH(batch, 3);
421     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
422     OUT_BATCH(batch, 0);
423     OUT_BATCH(batch, 
424               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
425               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
426     ADVANCE_BATCH(batch);
427 }
428
429 static void
430 ironlake_pp_state_base_address(VADriverContextP ctx)
431 {
432     struct i965_driver_data *i965 = i965_driver_data(ctx);
433     struct intel_batchbuffer *batch = i965->batch;
434
435     BEGIN_BATCH(batch, 8);
436     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
437     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
444     ADVANCE_BATCH(batch);
445 }
446
447 static void
448 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
449 {
450     struct i965_driver_data *i965 = i965_driver_data(ctx);
451     struct intel_batchbuffer *batch = i965->batch;
452
453     BEGIN_BATCH(batch, 3);
454     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
455     OUT_BATCH(batch, 0);
456     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
457     ADVANCE_BATCH(batch);
458 }
459
460 static void 
461 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
462 {
463     struct i965_driver_data *i965 = i965_driver_data(ctx);
464     struct intel_batchbuffer *batch = i965->batch;
465
466     BEGIN_BATCH(batch, 2);
467     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
468     OUT_BATCH(batch,
469               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
470               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
471     ADVANCE_BATCH(batch);
472 }
473
474 static void
475 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
476 {
477     struct i965_driver_data *i965 = i965_driver_data(ctx);
478     struct intel_batchbuffer *batch = i965->batch;
479
480     BEGIN_BATCH(batch, 2);
481     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
482     OUT_RELOC(batch, pp_context->curbe.bo,
483               I915_GEM_DOMAIN_INSTRUCTION, 0,
484               pp_context->urb.size_cs_entry - 1);
485     ADVANCE_BATCH(batch);    
486 }
487
488 static void
489 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
490 {
491     struct i965_driver_data *i965 = i965_driver_data(ctx);
492     struct intel_batchbuffer *batch = i965->batch;
493     int x, x_steps, y, y_steps;
494
495     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
496     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
497
498     for (y = 0; y < y_steps; y++) {
499         for (x = 0; x < x_steps; x++) {
500             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
501                 BEGIN_BATCH(batch, 20);
502                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
503                 OUT_BATCH(batch, 0);
504                 OUT_BATCH(batch, 0); /* no indirect data */
505                 OUT_BATCH(batch, 0);
506
507                 /* inline data grf 5-6 */
508                 assert(sizeof(pp_inline_parameter) == 64);
509                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
510
511                 ADVANCE_BATCH(batch);
512             }
513         }
514     }
515 }
516
517 static void
518 ironlake_pp_pipeline_setup(VADriverContextP ctx)
519 {
520     struct i965_driver_data *i965 = i965_driver_data(ctx);
521     struct intel_batchbuffer *batch = i965->batch;
522     struct i965_post_processing_context *pp_context = i965->pp_context;
523
524     intel_batchbuffer_start_atomic(batch, 0x1000);
525     intel_batchbuffer_emit_mi_flush(batch);
526     ironlake_pp_pipeline_select(ctx);
527     ironlake_pp_state_base_address(ctx);
528     ironlake_pp_state_pointers(ctx, pp_context);
529     ironlake_pp_urb_layout(ctx, pp_context);
530     ironlake_pp_cs_urb_layout(ctx, pp_context);
531     ironlake_pp_constant_buffer(ctx, pp_context);
532     ironlake_pp_object_walker(ctx, pp_context);
533     intel_batchbuffer_end_atomic(batch);
534 }
535
536 static int
537 pp_null_x_steps(void *private_context)
538 {
539     return 1;
540 }
541
542 static int
543 pp_null_y_steps(void *private_context)
544 {
545     return 1;
546 }
547
548 static int
549 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
550 {
551     return 0;
552 }
553
554 static void
555 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
556                    unsigned short srcw, unsigned short srch,
557                    unsigned short destw, unsigned short desth)
558 {
559     struct i965_driver_data *i965 = i965_driver_data(ctx);
560     struct i965_post_processing_context *pp_context = i965->pp_context;
561     struct object_surface *obj_surface;
562
563     /* surface */
564     obj_surface = SURFACE(surface);
565     dri_bo_unreference(obj_surface->pp_out_bo);
566     obj_surface->pp_out_bo = obj_surface->bo;
567     dri_bo_reference(obj_surface->pp_out_bo);
568     assert(obj_surface->pp_out_bo);
569     obj_surface->pp_out_width = obj_surface->width;
570     obj_surface->pp_out_height = obj_surface->height;
571     obj_surface->orig_pp_out_width = obj_surface->orig_width;
572     obj_surface->orig_pp_out_height = obj_surface->orig_height;
573
574     /* private function & data */
575     pp_context->pp_x_steps = pp_null_x_steps;
576     pp_context->pp_y_steps = pp_null_y_steps;
577     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
578 }
579
580 static int
581 pp_load_save_x_steps(void *private_context)
582 {
583     return 1;
584 }
585
586 static int
587 pp_load_save_y_steps(void *private_context)
588 {
589     struct pp_load_save_context *pp_load_save_context = private_context;
590
591     return pp_load_save_context->dest_h / 8;
592 }
593
594 static int
595 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
596 {
597     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
598     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
599     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
600     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
601
602     return 0;
603 }
604
605 static void
606 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
607                              unsigned short srcw, unsigned short srch,
608                              unsigned short destw, unsigned short desth)
609 {
610     struct i965_driver_data *i965 = i965_driver_data(ctx);
611     struct i965_post_processing_context *pp_context = i965->pp_context;
612     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
613     struct object_surface *obj_surface;
614     struct i965_surface_state *ss;
615     dri_bo *bo;
616     int index, w, h;
617     int orig_w, orig_h;
618     unsigned int tiling, swizzle;
619
620     /* surface */
621     obj_surface = SURFACE(surface);
622     orig_w = obj_surface->orig_width;
623     orig_h = obj_surface->orig_height;
624     w = obj_surface->width;
625     h = obj_surface->height;
626
627     dri_bo_unreference(obj_surface->pp_out_bo);
628     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
629                                           "intermediate surface",
630                                           SIZE_YUV420(w, h),
631                                           4096);
632     assert(obj_surface->pp_out_bo);
633     obj_surface->pp_out_width = obj_surface->width;
634     obj_surface->pp_out_height = obj_surface->height;
635     obj_surface->orig_pp_out_width = obj_surface->orig_width;
636     obj_surface->orig_pp_out_height = obj_surface->orig_height;
637
638     /* source Y surface index 1 */
639     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
640
641     index = 1;
642     pp_context->surfaces[index].s_bo = obj_surface->bo;
643     dri_bo_reference(pp_context->surfaces[index].s_bo);
644     bo = dri_bo_alloc(i965->intel.bufmgr, 
645                       "surface state", 
646                       sizeof(struct i965_surface_state), 
647                       4096);
648     assert(bo);
649     pp_context->surfaces[index].ss_bo = bo;
650     dri_bo_map(bo, True);
651     assert(bo->virtual);
652     ss = bo->virtual;
653     memset(ss, 0, sizeof(*ss));
654     ss->ss0.surface_type = I965_SURFACE_2D;
655     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
656     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
657     ss->ss2.width = orig_w / 4 - 1;
658     ss->ss2.height = orig_h - 1;
659     ss->ss3.pitch = w - 1;
660     pp_set_surface_tiling(ss, tiling);
661     dri_bo_emit_reloc(bo,
662                       I915_GEM_DOMAIN_RENDER, 
663                       0,
664                       0,
665                       offsetof(struct i965_surface_state, ss1),
666                       pp_context->surfaces[index].s_bo);
667     dri_bo_unmap(bo);
668
669     /* source UV surface index 2 */
670     index = 2;
671     pp_context->surfaces[index].s_bo = obj_surface->bo;
672     dri_bo_reference(pp_context->surfaces[index].s_bo);
673     bo = dri_bo_alloc(i965->intel.bufmgr, 
674                       "surface state", 
675                       sizeof(struct i965_surface_state), 
676                       4096);
677     assert(bo);
678     pp_context->surfaces[index].ss_bo = bo;
679     dri_bo_map(bo, True);
680     assert(bo->virtual);
681     ss = bo->virtual;
682     memset(ss, 0, sizeof(*ss));
683     ss->ss0.surface_type = I965_SURFACE_2D;
684     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
685     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
686     ss->ss2.width = orig_w / 4 - 1;
687     ss->ss2.height = orig_h / 2 - 1;
688     ss->ss3.pitch = w - 1;
689     pp_set_surface_tiling(ss, tiling);
690     dri_bo_emit_reloc(bo,
691                       I915_GEM_DOMAIN_RENDER, 
692                       0,
693                       w * h,
694                       offsetof(struct i965_surface_state, ss1),
695                       pp_context->surfaces[index].s_bo);
696     dri_bo_unmap(bo);
697
698     /* destination Y surface index 7 */
699     index = 7;
700     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
701     dri_bo_reference(pp_context->surfaces[index].s_bo);
702     bo = dri_bo_alloc(i965->intel.bufmgr, 
703                       "surface state", 
704                       sizeof(struct i965_surface_state), 
705                       4096);
706     assert(bo);
707     pp_context->surfaces[index].ss_bo = bo;
708     dri_bo_map(bo, True);
709     assert(bo->virtual);
710     ss = bo->virtual;
711     memset(ss, 0, sizeof(*ss));
712     ss->ss0.surface_type = I965_SURFACE_2D;
713     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
714     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
715     ss->ss2.width = orig_w / 4 - 1;
716     ss->ss2.height = orig_h - 1;
717     ss->ss3.pitch = w - 1;
718     dri_bo_emit_reloc(bo,
719                       I915_GEM_DOMAIN_RENDER, 
720                       I915_GEM_DOMAIN_RENDER,
721                       0,
722                       offsetof(struct i965_surface_state, ss1),
723                       pp_context->surfaces[index].s_bo);
724     dri_bo_unmap(bo);
725
726     /* destination UV surface index 8 */
727     index = 8;
728     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
729     dri_bo_reference(pp_context->surfaces[index].s_bo);
730     bo = dri_bo_alloc(i965->intel.bufmgr, 
731                       "surface state", 
732                       sizeof(struct i965_surface_state), 
733                       4096);
734     assert(bo);
735     pp_context->surfaces[index].ss_bo = bo;
736     dri_bo_map(bo, True);
737     assert(bo->virtual);
738     ss = bo->virtual;
739     memset(ss, 0, sizeof(*ss));
740     ss->ss0.surface_type = I965_SURFACE_2D;
741     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
742     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
743     ss->ss2.width = orig_w / 4 - 1;
744     ss->ss2.height = orig_h / 2 - 1;
745     ss->ss3.pitch = w - 1;
746     dri_bo_emit_reloc(bo,
747                       I915_GEM_DOMAIN_RENDER, 
748                       I915_GEM_DOMAIN_RENDER,
749                       w * h,
750                       offsetof(struct i965_surface_state, ss1),
751                       pp_context->surfaces[index].s_bo);
752     dri_bo_unmap(bo);
753
754     /* private function & data */
755     pp_context->pp_x_steps = pp_load_save_x_steps;
756     pp_context->pp_y_steps = pp_load_save_y_steps;
757     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
758     pp_load_save_context->dest_h = h;
759     pp_load_save_context->dest_w = w;
760
761     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
762     pp_inline_parameter.grf5.number_blocks = w / 16;
763 }
764
765 static int
766 pp_scaling_x_steps(void *private_context)
767 {
768     return 1;
769 }
770
771 static int
772 pp_scaling_y_steps(void *private_context)
773 {
774     struct pp_scaling_context *pp_scaling_context = private_context;
775
776     return pp_scaling_context->dest_h / 8;
777 }
778
779 static int
780 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
781 {
782     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
783     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
784
785     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
786     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
787     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
788     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
789     
790     return 0;
791 }
792
793 static void
794 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
795                            unsigned short srcw, unsigned short srch,
796                            unsigned short destw, unsigned short desth)
797 {
798     struct i965_driver_data *i965 = i965_driver_data(ctx);
799     struct i965_post_processing_context *pp_context = i965->pp_context;
800     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
801     struct object_surface *obj_surface;
802     struct i965_sampler_state *sampler_state;
803     struct i965_surface_state *ss;
804     dri_bo *bo;
805     int index;
806     int w, h;
807     int orig_w, orig_h;
808     int pp_out_w, pp_out_h;
809     int orig_pp_out_w, orig_pp_out_h;
810     unsigned int tiling, swizzle;
811
812     /* surface */
813     obj_surface = SURFACE(surface);
814     orig_w = obj_surface->orig_width;
815     orig_h = obj_surface->orig_height;
816     w = obj_surface->width;
817     h = obj_surface->height;
818
819     orig_pp_out_w = destw;
820     orig_pp_out_h = desth;
821     pp_out_w = ALIGN(orig_pp_out_w, 16);
822     pp_out_h = ALIGN(orig_pp_out_h, 16);
823     dri_bo_unreference(obj_surface->pp_out_bo);
824     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
825                                           "intermediate surface",
826                                           SIZE_YUV420(pp_out_w, pp_out_h),
827                                           4096);
828     assert(obj_surface->pp_out_bo);
829     obj_surface->orig_pp_out_width = orig_pp_out_w;
830     obj_surface->orig_pp_out_height = orig_pp_out_h;
831     obj_surface->pp_out_width = pp_out_w;
832     obj_surface->pp_out_height = pp_out_h;
833
834     /* source Y surface index 1 */
835     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
836
837     index = 1;
838     pp_context->surfaces[index].s_bo = obj_surface->bo;
839     dri_bo_reference(pp_context->surfaces[index].s_bo);
840     bo = dri_bo_alloc(i965->intel.bufmgr, 
841                       "surface state", 
842                       sizeof(struct i965_surface_state), 
843                       4096);
844     assert(bo);
845     pp_context->surfaces[index].ss_bo = bo;
846     dri_bo_map(bo, True);
847     assert(bo->virtual);
848     ss = bo->virtual;
849     memset(ss, 0, sizeof(*ss));
850     ss->ss0.surface_type = I965_SURFACE_2D;
851     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
852     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
853     ss->ss2.width = orig_w - 1;
854     ss->ss2.height = orig_h - 1;
855     ss->ss3.pitch = w - 1;
856     pp_set_surface_tiling(ss, tiling);
857     dri_bo_emit_reloc(bo,
858                       I915_GEM_DOMAIN_RENDER, 
859                       0,
860                       0,
861                       offsetof(struct i965_surface_state, ss1),
862                       pp_context->surfaces[index].s_bo);
863     dri_bo_unmap(bo);
864
865     /* source UV surface index 2 */
866     index = 2;
867     pp_context->surfaces[index].s_bo = obj_surface->bo;
868     dri_bo_reference(pp_context->surfaces[index].s_bo);
869     bo = dri_bo_alloc(i965->intel.bufmgr, 
870                       "surface state", 
871                       sizeof(struct i965_surface_state), 
872                       4096);
873     assert(bo);
874     pp_context->surfaces[index].ss_bo = bo;
875     dri_bo_map(bo, True);
876     assert(bo->virtual);
877     ss = bo->virtual;
878     memset(ss, 0, sizeof(*ss));
879     ss->ss0.surface_type = I965_SURFACE_2D;
880     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
881     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
882     ss->ss2.width = orig_w / 2 - 1;
883     ss->ss2.height = orig_h / 2 - 1;
884     ss->ss3.pitch = w - 1;
885     pp_set_surface_tiling(ss, tiling);
886     dri_bo_emit_reloc(bo,
887                       I915_GEM_DOMAIN_RENDER, 
888                       0,
889                       w * h,
890                       offsetof(struct i965_surface_state, ss1),
891                       pp_context->surfaces[index].s_bo);
892     dri_bo_unmap(bo);
893
894     /* destination Y surface index 7 */
895     index = 7;
896     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
897     dri_bo_reference(pp_context->surfaces[index].s_bo);
898     bo = dri_bo_alloc(i965->intel.bufmgr, 
899                       "surface state", 
900                       sizeof(struct i965_surface_state), 
901                       4096);
902     assert(bo);
903     pp_context->surfaces[index].ss_bo = bo;
904     dri_bo_map(bo, True);
905     assert(bo->virtual);
906     ss = bo->virtual;
907     memset(ss, 0, sizeof(*ss));
908     ss->ss0.surface_type = I965_SURFACE_2D;
909     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
910     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
911     ss->ss2.width = pp_out_w / 4 - 1;
912     ss->ss2.height = pp_out_h - 1;
913     ss->ss3.pitch = pp_out_w - 1;
914     dri_bo_emit_reloc(bo,
915                       I915_GEM_DOMAIN_RENDER, 
916                       I915_GEM_DOMAIN_RENDER,
917                       0,
918                       offsetof(struct i965_surface_state, ss1),
919                       pp_context->surfaces[index].s_bo);
920     dri_bo_unmap(bo);
921
922     /* destination UV surface index 8 */
923     index = 8;
924     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
925     dri_bo_reference(pp_context->surfaces[index].s_bo);
926     bo = dri_bo_alloc(i965->intel.bufmgr, 
927                       "surface state", 
928                       sizeof(struct i965_surface_state), 
929                       4096);
930     assert(bo);
931     pp_context->surfaces[index].ss_bo = bo;
932     dri_bo_map(bo, True);
933     assert(bo->virtual);
934     ss = bo->virtual;
935     memset(ss, 0, sizeof(*ss));
936     ss->ss0.surface_type = I965_SURFACE_2D;
937     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
938     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
939     ss->ss2.width = pp_out_w / 4 - 1;
940     ss->ss2.height = pp_out_h / 2 - 1;
941     ss->ss3.pitch = pp_out_w - 1;
942     dri_bo_emit_reloc(bo,
943                       I915_GEM_DOMAIN_RENDER, 
944                       I915_GEM_DOMAIN_RENDER,
945                       pp_out_w * pp_out_h,
946                       offsetof(struct i965_surface_state, ss1),
947                       pp_context->surfaces[index].s_bo);
948     dri_bo_unmap(bo);
949
950     /* sampler state */
951     dri_bo_map(pp_context->sampler_state_table.bo, True);
952     assert(pp_context->sampler_state_table.bo->virtual);
953     sampler_state = pp_context->sampler_state_table.bo->virtual;
954
955     /* SIMD16 Y index 1 */
956     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
957     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
958     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
959     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
960     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
961
962     /* SIMD16 UV index 2 */
963     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
964     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
965     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
966     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
967     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
968
969     dri_bo_unmap(pp_context->sampler_state_table.bo);
970
971     /* private function & data */
972     pp_context->pp_x_steps = pp_scaling_x_steps;
973     pp_context->pp_y_steps = pp_scaling_y_steps;
974     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
975
976     pp_scaling_context->dest_w = pp_out_w;
977     pp_scaling_context->dest_h = pp_out_h;
978
979     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
980     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
981     pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
982     pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
983     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
984     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
985 }
986
987 static int
988 pp_avs_x_steps(void *private_context)
989 {
990     struct pp_avs_context *pp_avs_context = private_context;
991
992     return pp_avs_context->dest_w / 16;
993 }
994
995 static int
996 pp_avs_y_steps(void *private_context)
997 {
998     return 1;
999 }
1000
1001 static int
1002 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1003 {
1004     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1005     float src_x_steping, src_y_steping, video_step_delta;
1006     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1007
1008     if (tmp_w >= pp_avs_context->dest_w) {
1009         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1010         pp_inline_parameter.grf6.video_step_delta = 0;
1011         
1012         if (x == 0) {
1013             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1014         } else {
1015             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1016             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1017             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1018                 16 * 15 * video_step_delta / 2;
1019         }
1020     } else {
1021         int n0, n1, n2, nls_left, nls_right;
1022         int factor_a = 5, factor_b = 4;
1023         float f;
1024
1025         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1026         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1027         n2 = tmp_w / (16 * factor_a);
1028         nls_left = n0 + n2;
1029         nls_right = n1 + n2;
1030         f = (float) n2 * 16 / tmp_w;
1031         
1032         if (n0 < 5) {
1033             pp_inline_parameter.grf6.video_step_delta = 0.0;
1034
1035             if (x == 0) {
1036                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1037                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1038             } else {
1039                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1040                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1041                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1042                     16 * 15 * video_step_delta / 2;
1043             }
1044         } else {
1045             if (x < nls_left) {
1046                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1047                 float a = f / (nls_left * 16 * factor_b);
1048                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1049                 
1050                 pp_inline_parameter.grf6.video_step_delta = b;
1051
1052                 if (x == 0) {
1053                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1054                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1055                 } else {
1056                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1057                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1058                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1059                         16 * 15 * video_step_delta / 2;
1060                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1061                 }
1062             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1063                 /* scale the center linearly */
1064                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1065                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1066                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1067                     16 * 15 * video_step_delta / 2;
1068                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1069                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1070             } else {
1071                 float a = f / (nls_right * 16 * factor_b);
1072                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1073
1074                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1075                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1076                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1077                     16 * 15 * video_step_delta / 2;
1078                 pp_inline_parameter.grf6.video_step_delta = -b;
1079
1080                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1081                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1082                 else
1083                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1084             }
1085         }
1086     }
1087
1088     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1089     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1090     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1091     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1092
1093     return 0;
1094 }
1095
1096 static void
1097 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1098                        unsigned short srcw, unsigned short srch,
1099                        unsigned short destw, unsigned short desth)
1100 {
1101     struct i965_driver_data *i965 = i965_driver_data(ctx);
1102     struct i965_post_processing_context *pp_context = i965->pp_context;
1103     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1104     struct object_surface *obj_surface;
1105     struct i965_surface_state *ss;
1106     struct i965_sampler_8x8 *sampler_8x8;
1107     struct i965_sampler_8x8_state *sampler_8x8_state;
1108     struct i965_surface_state2 *ss_8x8;
1109     dri_bo *bo, *src_bo;
1110     int index;
1111     int w, h;
1112     int orig_w, orig_h;
1113     int pp_out_w, pp_out_h;
1114     int orig_pp_out_w, orig_pp_out_h;
1115     unsigned int tiling, swizzle;
1116
1117     /* surface */
1118     obj_surface = SURFACE(surface);
1119     
1120     if (input == 1) {
1121         orig_w = obj_surface->orig_pp_out_width;
1122         orig_h = obj_surface->orig_pp_out_height;
1123         w = obj_surface->pp_out_width;
1124         h = obj_surface->pp_out_height;
1125         src_bo = obj_surface->pp_out_bo;
1126     } else {
1127         orig_w = obj_surface->orig_width;
1128         orig_h = obj_surface->orig_height;
1129         w = obj_surface->width;
1130         h = obj_surface->height;
1131         src_bo = obj_surface->bo;
1132     }
1133
1134     assert(src_bo);
1135     dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1136
1137     /* source Y surface index 1 */
1138     index = 1;
1139     pp_context->surfaces[index].s_bo = src_bo;
1140     dri_bo_reference(pp_context->surfaces[index].s_bo);
1141     bo = dri_bo_alloc(i965->intel.bufmgr, 
1142                       "Y surface state for sample_8x8", 
1143                       sizeof(struct i965_surface_state2), 
1144                       4096);
1145     assert(bo);
1146     pp_context->surfaces[index].ss_bo = bo;
1147     dri_bo_map(bo, True);
1148     assert(bo->virtual);
1149     ss_8x8 = bo->virtual;
1150     memset(ss_8x8, 0, sizeof(*ss_8x8));
1151     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1152     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1153     ss_8x8->ss1.width = orig_w - 1;
1154     ss_8x8->ss1.height = orig_h - 1;
1155     ss_8x8->ss2.half_pitch_for_chroma = 0;
1156     ss_8x8->ss2.pitch = w - 1;
1157     ss_8x8->ss2.interleave_chroma = 0;
1158     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1159     ss_8x8->ss3.x_offset_for_cb = 0;
1160     ss_8x8->ss3.y_offset_for_cb = 0;
1161     pp_set_surface2_tiling(ss_8x8, tiling);
1162     dri_bo_emit_reloc(bo,
1163                       I915_GEM_DOMAIN_RENDER, 
1164                       0,
1165                       0,
1166                       offsetof(struct i965_surface_state2, ss0),
1167                       pp_context->surfaces[index].s_bo);
1168     dri_bo_unmap(bo);
1169
1170     /* source UV surface index 2 */
1171     index = 2;
1172     pp_context->surfaces[index].s_bo = src_bo;
1173     dri_bo_reference(pp_context->surfaces[index].s_bo);
1174     bo = dri_bo_alloc(i965->intel.bufmgr, 
1175                       "UV surface state for sample_8x8", 
1176                       sizeof(struct i965_surface_state2), 
1177                       4096);
1178     assert(bo);
1179     pp_context->surfaces[index].ss_bo = bo;
1180     dri_bo_map(bo, True);
1181     assert(bo->virtual);
1182     ss_8x8 = bo->virtual;
1183     memset(ss_8x8, 0, sizeof(*ss_8x8));
1184     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1185     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1186     ss_8x8->ss1.width = orig_w - 1;
1187     ss_8x8->ss1.height = orig_h - 1;
1188     ss_8x8->ss2.half_pitch_for_chroma = 0;
1189     ss_8x8->ss2.pitch = w - 1;
1190     ss_8x8->ss2.interleave_chroma = 1;
1191     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1192     ss_8x8->ss3.x_offset_for_cb = 0;
1193     ss_8x8->ss3.y_offset_for_cb = 0;
1194     pp_set_surface2_tiling(ss_8x8, tiling);
1195     dri_bo_emit_reloc(bo,
1196                       I915_GEM_DOMAIN_RENDER, 
1197                       0,
1198                       w * h,
1199                       offsetof(struct i965_surface_state2, ss0),
1200                       pp_context->surfaces[index].s_bo);
1201     dri_bo_unmap(bo);
1202
1203     orig_pp_out_w = destw;
1204     orig_pp_out_h = desth;
1205     pp_out_w = ALIGN(orig_pp_out_w, 16);
1206     pp_out_h = ALIGN(orig_pp_out_h, 16);
1207     dri_bo_unreference(obj_surface->pp_out_bo);
1208     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1209                                           "intermediate surface",
1210                                           SIZE_YUV420(pp_out_w, pp_out_h),
1211                                           4096);
1212     assert(obj_surface->pp_out_bo);
1213     obj_surface->orig_pp_out_width = orig_pp_out_w;
1214     obj_surface->orig_pp_out_height = orig_pp_out_h;
1215     obj_surface->pp_out_width = pp_out_w;
1216     obj_surface->pp_out_height = pp_out_h;
1217
1218     /* destination Y surface index 7 */
1219     index = 7;
1220     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1221     dri_bo_reference(pp_context->surfaces[index].s_bo);
1222     bo = dri_bo_alloc(i965->intel.bufmgr, 
1223                       "surface state", 
1224                       sizeof(struct i965_surface_state), 
1225                       4096);
1226     assert(bo);
1227     pp_context->surfaces[index].ss_bo = bo;
1228     dri_bo_map(bo, True);
1229     assert(bo->virtual);
1230     ss = bo->virtual;
1231     memset(ss, 0, sizeof(*ss));
1232     ss->ss0.surface_type = I965_SURFACE_2D;
1233     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1234     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1235     ss->ss2.width = pp_out_w / 4 - 1;
1236     ss->ss2.height = pp_out_h - 1;
1237     ss->ss3.pitch = pp_out_w - 1;
1238     dri_bo_emit_reloc(bo,
1239                       I915_GEM_DOMAIN_RENDER, 
1240                       I915_GEM_DOMAIN_RENDER,
1241                       0,
1242                       offsetof(struct i965_surface_state, ss1),
1243                       pp_context->surfaces[index].s_bo);
1244     dri_bo_unmap(bo);
1245
1246     /* destination UV surface index 8 */
1247     index = 8;
1248     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1249     dri_bo_reference(pp_context->surfaces[index].s_bo);
1250     bo = dri_bo_alloc(i965->intel.bufmgr, 
1251                       "surface state", 
1252                       sizeof(struct i965_surface_state), 
1253                       4096);
1254     assert(bo);
1255     pp_context->surfaces[index].ss_bo = bo;
1256     dri_bo_map(bo, True);
1257     assert(bo->virtual);
1258     ss = bo->virtual;
1259     memset(ss, 0, sizeof(*ss));
1260     ss->ss0.surface_type = I965_SURFACE_2D;
1261     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1262     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1263     ss->ss2.width = pp_out_w / 4 - 1;
1264     ss->ss2.height = pp_out_h / 2 - 1;
1265     ss->ss3.pitch = pp_out_w - 1;
1266     dri_bo_emit_reloc(bo,
1267                       I915_GEM_DOMAIN_RENDER, 
1268                       I915_GEM_DOMAIN_RENDER,
1269                       pp_out_w * pp_out_h,
1270                       offsetof(struct i965_surface_state, ss1),
1271                       pp_context->surfaces[index].s_bo);
1272     dri_bo_unmap(bo);
1273     
1274     /* sampler 8x8 state */
1275     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1276     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1277     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1278     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1279     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1280     sampler_8x8_state->dw136.default_sharpness_level = 0;
1281     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1282     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1283     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1284     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1285
1286     /* sampler 8x8 */
1287     dri_bo_map(pp_context->sampler_state_table.bo, True);
1288     assert(pp_context->sampler_state_table.bo->virtual);
1289     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1290     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1291
1292     /* sample_8x8 Y index 1 */
1293     index = 1;
1294     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1295     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1296     sampler_8x8[index].dw0.ief_bypass = 0;
1297     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1298     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1299     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1300     sampler_8x8[index].dw2.global_noise_estimation = 22;
1301     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1302     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1303     sampler_8x8[index].dw3.strong_edge_weight = 7;
1304     sampler_8x8[index].dw3.regular_weight = 2;
1305     sampler_8x8[index].dw3.non_edge_weight = 0;
1306     sampler_8x8[index].dw3.gain_factor = 40;
1307     sampler_8x8[index].dw4.steepness_boost = 0;
1308     sampler_8x8[index].dw4.steepness_threshold = 0;
1309     sampler_8x8[index].dw4.mr_boost = 0;
1310     sampler_8x8[index].dw4.mr_threshold = 5;
1311     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1312     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1313     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1314     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1315     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1316     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1317     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1318     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1319     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1320     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1321     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1322     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1323     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1324     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1325     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1326     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1327     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1328     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1329     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1330     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1331     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1332     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1333     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1334     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1335     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1336     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1337     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1338     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1339     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1340     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1341     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1342     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1343     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1344     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1345     sampler_8x8[index].dw13.limiter_boost = 0;
1346     sampler_8x8[index].dw13.minimum_limiter = 10;
1347     sampler_8x8[index].dw13.maximum_limiter = 11;
1348     sampler_8x8[index].dw14.clip_limiter = 130;
1349     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1350                       I915_GEM_DOMAIN_RENDER, 
1351                       0,
1352                       0,
1353                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1354                       pp_context->sampler_state_table.bo_8x8);
1355
1356     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1357     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1358     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1359     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1360     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1361     sampler_8x8_state->dw136.default_sharpness_level = 0;
1362     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1363     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1364     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1365     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1366
1367     /* sample_8x8 UV index 2 */
1368     index = 2;
1369     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1370     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1371     sampler_8x8[index].dw0.ief_bypass = 0;
1372     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1373     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1374     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1375     sampler_8x8[index].dw2.global_noise_estimation = 22;
1376     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1377     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1378     sampler_8x8[index].dw3.strong_edge_weight = 7;
1379     sampler_8x8[index].dw3.regular_weight = 2;
1380     sampler_8x8[index].dw3.non_edge_weight = 0;
1381     sampler_8x8[index].dw3.gain_factor = 40;
1382     sampler_8x8[index].dw4.steepness_boost = 0;
1383     sampler_8x8[index].dw4.steepness_threshold = 0;
1384     sampler_8x8[index].dw4.mr_boost = 0;
1385     sampler_8x8[index].dw4.mr_threshold = 5;
1386     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1387     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1388     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1389     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1390     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1391     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1392     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1393     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1394     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1395     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1396     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1397     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1398     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1399     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1400     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1401     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1402     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1403     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1404     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1405     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1406     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1407     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1408     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1409     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1410     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1411     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1412     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1413     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1414     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1415     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1416     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1417     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1418     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1419     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1420     sampler_8x8[index].dw13.limiter_boost = 0;
1421     sampler_8x8[index].dw13.minimum_limiter = 10;
1422     sampler_8x8[index].dw13.maximum_limiter = 11;
1423     sampler_8x8[index].dw14.clip_limiter = 130;
1424     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1425                       I915_GEM_DOMAIN_RENDER, 
1426                       0,
1427                       0,
1428                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1429                       pp_context->sampler_state_table.bo_8x8_uv);
1430
1431     dri_bo_unmap(pp_context->sampler_state_table.bo);
1432
1433     /* private function & data */
1434     pp_context->pp_x_steps = pp_avs_x_steps;
1435     pp_context->pp_y_steps = pp_avs_y_steps;
1436     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1437
1438     pp_avs_context->dest_w = pp_out_w;
1439     pp_avs_context->dest_h = pp_out_h;
1440     pp_avs_context->src_w = w;
1441     pp_avs_context->src_h = h;
1442
1443     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1444     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1445     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1446     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1447     pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1448     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1449     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1450     pp_inline_parameter.grf6.video_step_delta = 0.0;
1451 }
1452
1453 static int
1454 pp_dndi_x_steps(void *private_context)
1455 {
1456     return 1;
1457 }
1458
1459 static int
1460 pp_dndi_y_steps(void *private_context)
1461 {
1462     struct pp_dndi_context *pp_dndi_context = private_context;
1463
1464     return pp_dndi_context->dest_h / 4;
1465 }
1466
1467 static int
1468 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1469 {
1470     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1471     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1472
1473     return 0;
1474 }
1475
1476 static 
1477 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1478                              unsigned short srcw, unsigned short srch,
1479                              unsigned short destw, unsigned short desth)
1480 {
1481     struct i965_driver_data *i965 = i965_driver_data(ctx);
1482     struct i965_post_processing_context *pp_context = i965->pp_context;
1483     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1484     struct object_surface *obj_surface;
1485     struct i965_surface_state *ss;
1486     struct i965_surface_state2 *ss_dndi;
1487     struct i965_sampler_dndi *sampler_dndi;
1488     dri_bo *bo;
1489     int index;
1490     int w, h;
1491     int orig_w, orig_h;
1492     unsigned int tiling, swizzle;
1493
1494     /* surface */
1495     obj_surface = SURFACE(surface);
1496     orig_w = obj_surface->orig_width;
1497     orig_h = obj_surface->orig_height;
1498     w = obj_surface->width;
1499     h = obj_surface->height;
1500
1501     if (pp_context->stmm.bo == NULL) {
1502         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1503                                            "STMM surface",
1504                                            w * h,
1505                                            4096);
1506         assert(pp_context->stmm.bo);
1507     }
1508
1509     dri_bo_unreference(obj_surface->pp_out_bo);
1510     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1511                                           "intermediate surface",
1512                                           SIZE_YUV420(w, h),
1513                                           4096);
1514     assert(obj_surface->pp_out_bo);
1515     obj_surface->orig_pp_out_width = orig_w;
1516     obj_surface->orig_pp_out_height = orig_h;
1517     obj_surface->pp_out_width = w;
1518     obj_surface->pp_out_height = h;
1519
1520     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1521     /* source UV surface index 2 */
1522     index = 2;
1523     pp_context->surfaces[index].s_bo = obj_surface->bo;
1524     dri_bo_reference(pp_context->surfaces[index].s_bo);
1525     bo = dri_bo_alloc(i965->intel.bufmgr, 
1526                       "surface state", 
1527                       sizeof(struct i965_surface_state), 
1528                       4096);
1529     assert(bo);
1530     pp_context->surfaces[index].ss_bo = bo;
1531     dri_bo_map(bo, True);
1532     assert(bo->virtual);
1533     ss = bo->virtual;
1534     memset(ss, 0, sizeof(*ss));
1535     ss->ss0.surface_type = I965_SURFACE_2D;
1536     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1537     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1538     ss->ss2.width = orig_w / 4 - 1;
1539     ss->ss2.height = orig_h / 2 - 1;
1540     ss->ss3.pitch = w - 1;
1541     pp_set_surface_tiling(ss, tiling);
1542     dri_bo_emit_reloc(bo,
1543                       I915_GEM_DOMAIN_RENDER, 
1544                       0,
1545                       w * h,
1546                       offsetof(struct i965_surface_state, ss1),
1547                       pp_context->surfaces[index].s_bo);
1548     dri_bo_unmap(bo);
1549
1550     /* source YUV surface index 4 */
1551     index = 4;
1552     pp_context->surfaces[index].s_bo = obj_surface->bo;
1553     dri_bo_reference(pp_context->surfaces[index].s_bo);
1554     bo = dri_bo_alloc(i965->intel.bufmgr, 
1555                       "YUV surface state for deinterlace ", 
1556                       sizeof(struct i965_surface_state2), 
1557                       4096);
1558     assert(bo);
1559     pp_context->surfaces[index].ss_bo = bo;
1560     dri_bo_map(bo, True);
1561     assert(bo->virtual);
1562     ss_dndi = bo->virtual;
1563     memset(ss_dndi, 0, sizeof(*ss_dndi));
1564     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1565     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1566     ss_dndi->ss1.width = w - 1;
1567     ss_dndi->ss1.height = h - 1;
1568     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1569     ss_dndi->ss2.half_pitch_for_chroma = 0;
1570     ss_dndi->ss2.pitch = w - 1;
1571     ss_dndi->ss2.interleave_chroma = 1;
1572     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1573     ss_dndi->ss2.half_pitch_for_chroma = 0;
1574     ss_dndi->ss2.tiled_surface = 0;
1575     ss_dndi->ss3.x_offset_for_cb = 0;
1576     ss_dndi->ss3.y_offset_for_cb = h;
1577     pp_set_surface2_tiling(ss_dndi, tiling);
1578     dri_bo_emit_reloc(bo,
1579                       I915_GEM_DOMAIN_RENDER, 
1580                       0,
1581                       0,
1582                       offsetof(struct i965_surface_state2, ss0),
1583                       pp_context->surfaces[index].s_bo);
1584     dri_bo_unmap(bo);
1585
1586     /* source STMM surface index 20 */
1587     index = 20;
1588     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1589     dri_bo_reference(pp_context->surfaces[index].s_bo);
1590     bo = dri_bo_alloc(i965->intel.bufmgr, 
1591                       "STMM surface state for deinterlace ", 
1592                       sizeof(struct i965_surface_state2), 
1593                       4096);
1594     assert(bo);
1595     pp_context->surfaces[index].ss_bo = bo;
1596     dri_bo_map(bo, True);
1597     assert(bo->virtual);
1598     ss = bo->virtual;
1599     memset(ss, 0, sizeof(*ss));
1600     ss->ss0.surface_type = I965_SURFACE_2D;
1601     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1602     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1603     ss->ss2.width = w - 1;
1604     ss->ss2.height = h - 1;
1605     ss->ss3.pitch = w - 1;
1606     dri_bo_emit_reloc(bo,
1607                       I915_GEM_DOMAIN_RENDER, 
1608                       I915_GEM_DOMAIN_RENDER,
1609                       0,
1610                       offsetof(struct i965_surface_state, ss1),
1611                       pp_context->surfaces[index].s_bo);
1612     dri_bo_unmap(bo);
1613
1614     /* destination Y surface index 7 */
1615     index = 7;
1616     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1617     dri_bo_reference(pp_context->surfaces[index].s_bo);
1618     bo = dri_bo_alloc(i965->intel.bufmgr, 
1619                       "surface state", 
1620                       sizeof(struct i965_surface_state), 
1621                       4096);
1622     assert(bo);
1623     pp_context->surfaces[index].ss_bo = bo;
1624     dri_bo_map(bo, True);
1625     assert(bo->virtual);
1626     ss = bo->virtual;
1627     memset(ss, 0, sizeof(*ss));
1628     ss->ss0.surface_type = I965_SURFACE_2D;
1629     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1630     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1631     ss->ss2.width = w / 4 - 1;
1632     ss->ss2.height = h - 1;
1633     ss->ss3.pitch = w - 1;
1634     dri_bo_emit_reloc(bo,
1635                       I915_GEM_DOMAIN_RENDER, 
1636                       I915_GEM_DOMAIN_RENDER,
1637                       0,
1638                       offsetof(struct i965_surface_state, ss1),
1639                       pp_context->surfaces[index].s_bo);
1640     dri_bo_unmap(bo);
1641
1642     /* destination UV surface index 8 */
1643     index = 8;
1644     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1645     dri_bo_reference(pp_context->surfaces[index].s_bo);
1646     bo = dri_bo_alloc(i965->intel.bufmgr, 
1647                       "surface state", 
1648                       sizeof(struct i965_surface_state), 
1649                       4096);
1650     assert(bo);
1651     pp_context->surfaces[index].ss_bo = bo;
1652     dri_bo_map(bo, True);
1653     assert(bo->virtual);
1654     ss = bo->virtual;
1655     memset(ss, 0, sizeof(*ss));
1656     ss->ss0.surface_type = I965_SURFACE_2D;
1657     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1658     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1659     ss->ss2.width = w / 4 - 1;
1660     ss->ss2.height = h / 2 - 1;
1661     ss->ss3.pitch = w - 1;
1662     dri_bo_emit_reloc(bo,
1663                       I915_GEM_DOMAIN_RENDER, 
1664                       I915_GEM_DOMAIN_RENDER,
1665                       w * h,
1666                       offsetof(struct i965_surface_state, ss1),
1667                       pp_context->surfaces[index].s_bo);
1668     dri_bo_unmap(bo);
1669
1670     /* sampler dndi */
1671     dri_bo_map(pp_context->sampler_state_table.bo, True);
1672     assert(pp_context->sampler_state_table.bo->virtual);
1673     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1674     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1675
1676     /* sample dndi index 1 */
1677     index = 0;
1678     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1679     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1680     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1681     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1682
1683     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1684     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1685     sampler_dndi[index].dw1.stmm_c2 = 0;
1686     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1687     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1688
1689     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1690     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1691     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1692     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1693
1694     sampler_dndi[index].dw3.maximum_stmm = 128;
1695     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1696     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1697     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1698     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1699
1700     sampler_dndi[index].dw4.sdi_delta = 8;
1701     sampler_dndi[index].dw4.sdi_threshold = 128;
1702     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1703     sampler_dndi[index].dw4.stmm_shift_up = 0;
1704     sampler_dndi[index].dw4.stmm_shift_down = 0;
1705     sampler_dndi[index].dw4.minimum_stmm = 0;
1706
1707     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1708     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1709     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1710     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1711
1712     sampler_dndi[index].dw6.dn_enable = 1;
1713     sampler_dndi[index].dw6.di_enable = 1;
1714     sampler_dndi[index].dw6.di_partial = 0;
1715     sampler_dndi[index].dw6.dndi_top_first = 1;
1716     sampler_dndi[index].dw6.dndi_stream_id = 1;
1717     sampler_dndi[index].dw6.dndi_first_frame = 1;
1718     sampler_dndi[index].dw6.progressive_dn = 0;
1719     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1720     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1721     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1722
1723     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1724     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1725     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1726     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1727
1728     dri_bo_unmap(pp_context->sampler_state_table.bo);
1729
1730     /* private function & data */
1731     pp_context->pp_x_steps = pp_dndi_x_steps;
1732     pp_context->pp_y_steps = pp_dndi_y_steps;
1733     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1734
1735     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1736     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1737     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1738     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1739
1740     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1741     pp_inline_parameter.grf5.number_blocks = w / 16;
1742     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1743     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1744
1745     pp_dndi_context->dest_w = w;
1746     pp_dndi_context->dest_h = h;
1747 }
1748
1749 static void
1750 ironlake_pp_initialize(VADriverContextP ctx,
1751                        VASurfaceID surface,
1752                        int input,
1753                        short srcx,
1754                        short srcy,
1755                        unsigned short srcw,
1756                        unsigned short srch,
1757                        short destx,
1758                        short desty,
1759                        unsigned short destw,
1760                        unsigned short desth,
1761                        int pp_index)
1762 {
1763     struct i965_driver_data *i965 = i965_driver_data(ctx);
1764     struct i965_post_processing_context *pp_context = i965->pp_context;
1765     struct pp_module *pp_module;
1766     dri_bo *bo;
1767     int i;
1768
1769     dri_bo_unreference(pp_context->curbe.bo);
1770     bo = dri_bo_alloc(i965->intel.bufmgr,
1771                       "constant buffer",
1772                       4096, 
1773                       4096);
1774     assert(bo);
1775     pp_context->curbe.bo = bo;
1776
1777     dri_bo_unreference(pp_context->binding_table.bo);
1778     bo = dri_bo_alloc(i965->intel.bufmgr, 
1779                       "binding table",
1780                       sizeof(unsigned int), 
1781                       4096);
1782     assert(bo);
1783     pp_context->binding_table.bo = bo;
1784
1785     dri_bo_unreference(pp_context->idrt.bo);
1786     bo = dri_bo_alloc(i965->intel.bufmgr, 
1787                       "interface discriptor", 
1788                       sizeof(struct i965_interface_descriptor), 
1789                       4096);
1790     assert(bo);
1791     pp_context->idrt.bo = bo;
1792     pp_context->idrt.num_interface_descriptors = 0;
1793
1794     dri_bo_unreference(pp_context->sampler_state_table.bo);
1795     bo = dri_bo_alloc(i965->intel.bufmgr, 
1796                       "sampler state table", 
1797                       4096,
1798                       4096);
1799     assert(bo);
1800     dri_bo_map(bo, True);
1801     memset(bo->virtual, 0, bo->size);
1802     dri_bo_unmap(bo);
1803     pp_context->sampler_state_table.bo = bo;
1804
1805     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1806     bo = dri_bo_alloc(i965->intel.bufmgr, 
1807                       "sampler 8x8 state ",
1808                       4096,
1809                       4096);
1810     assert(bo);
1811     pp_context->sampler_state_table.bo_8x8 = bo;
1812
1813     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1814     bo = dri_bo_alloc(i965->intel.bufmgr, 
1815                       "sampler 8x8 state ",
1816                       4096,
1817                       4096);
1818     assert(bo);
1819     pp_context->sampler_state_table.bo_8x8_uv = bo;
1820
1821     dri_bo_unreference(pp_context->vfe_state.bo);
1822     bo = dri_bo_alloc(i965->intel.bufmgr, 
1823                       "vfe state", 
1824                       sizeof(struct i965_vfe_state), 
1825                       4096);
1826     assert(bo);
1827     pp_context->vfe_state.bo = bo;
1828     
1829     for (i = 0; i < MAX_PP_SURFACES; i++) {
1830         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1831         pp_context->surfaces[i].ss_bo = NULL;
1832
1833         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1834         pp_context->surfaces[i].s_bo = NULL;
1835     }
1836
1837     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1838     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1839     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1840     pp_context->current_pp = pp_index;
1841     pp_module = &pp_context->pp_modules[pp_index];
1842     
1843     if (pp_module->initialize)
1844         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1845 }
1846
1847 static void
1848 ironlake_post_processing(VADriverContextP ctx,
1849                          VASurfaceID surface,
1850                          int input,
1851                          short srcx,
1852                          short srcy,
1853                          unsigned short srcw,
1854                          unsigned short srch,
1855                          short destx,
1856                          short desty,
1857                          unsigned short destw,
1858                          unsigned short desth,
1859                          int pp_index)
1860 {
1861     ironlake_pp_initialize(ctx, surface, input,
1862                            srcx, srcy, srcw, srch,
1863                            destx, desty, destw, desth,
1864                            pp_index);
1865     ironlake_pp_states_setup(ctx);
1866     ironlake_pp_pipeline_setup(ctx);
1867 }
1868
1869 static void
1870 gen6_pp_initialize(VADriverContextP ctx,
1871                    VASurfaceID surface,
1872                    int input,
1873                    short srcx,
1874                    short srcy,
1875                    unsigned short srcw,
1876                    unsigned short srch,
1877                    short destx,
1878                    short desty,
1879                    unsigned short destw,
1880                    unsigned short desth,
1881                    int pp_index)
1882 {
1883     struct i965_driver_data *i965 = i965_driver_data(ctx);
1884     struct i965_post_processing_context *pp_context = i965->pp_context;
1885     struct pp_module *pp_module;
1886     dri_bo *bo;
1887     int i;
1888
1889     dri_bo_unreference(pp_context->curbe.bo);
1890     bo = dri_bo_alloc(i965->intel.bufmgr,
1891                       "constant buffer",
1892                       4096, 
1893                       4096);
1894     assert(bo);
1895     pp_context->curbe.bo = bo;
1896
1897     dri_bo_unreference(pp_context->binding_table.bo);
1898     bo = dri_bo_alloc(i965->intel.bufmgr, 
1899                       "binding table",
1900                       sizeof(unsigned int), 
1901                       4096);
1902     assert(bo);
1903     pp_context->binding_table.bo = bo;
1904
1905     dri_bo_unreference(pp_context->idrt.bo);
1906     bo = dri_bo_alloc(i965->intel.bufmgr, 
1907                       "interface discriptor", 
1908                       sizeof(struct gen6_interface_descriptor_data), 
1909                       4096);
1910     assert(bo);
1911     pp_context->idrt.bo = bo;
1912     pp_context->idrt.num_interface_descriptors = 0;
1913
1914     dri_bo_unreference(pp_context->sampler_state_table.bo);
1915     bo = dri_bo_alloc(i965->intel.bufmgr, 
1916                       "sampler state table", 
1917                       4096,
1918                       4096);
1919     assert(bo);
1920     dri_bo_map(bo, True);
1921     memset(bo->virtual, 0, bo->size);
1922     dri_bo_unmap(bo);
1923     pp_context->sampler_state_table.bo = bo;
1924
1925     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1926     bo = dri_bo_alloc(i965->intel.bufmgr, 
1927                       "sampler 8x8 state ",
1928                       4096,
1929                       4096);
1930     assert(bo);
1931     pp_context->sampler_state_table.bo_8x8 = bo;
1932
1933     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1934     bo = dri_bo_alloc(i965->intel.bufmgr, 
1935                       "sampler 8x8 state ",
1936                       4096,
1937                       4096);
1938     assert(bo);
1939     pp_context->sampler_state_table.bo_8x8_uv = bo;
1940
1941     dri_bo_unreference(pp_context->vfe_state.bo);
1942     bo = dri_bo_alloc(i965->intel.bufmgr, 
1943                       "vfe state", 
1944                       sizeof(struct i965_vfe_state), 
1945                       4096);
1946     assert(bo);
1947     pp_context->vfe_state.bo = bo;
1948     
1949     for (i = 0; i < MAX_PP_SURFACES; i++) {
1950         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1951         pp_context->surfaces[i].ss_bo = NULL;
1952
1953         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1954         pp_context->surfaces[i].s_bo = NULL;
1955     }
1956
1957     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1958     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1959     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1960     pp_context->current_pp = pp_index;
1961     pp_module = &pp_context->pp_modules[pp_index];
1962     
1963     if (pp_module->initialize)
1964         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1965 }
1966
1967 static void
1968 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1969 {
1970     unsigned int *binding_table;
1971     dri_bo *bo = pp_context->binding_table.bo;
1972     int i;
1973
1974     dri_bo_map(bo, 1);
1975     assert(bo->virtual);
1976     binding_table = bo->virtual;
1977     memset(binding_table, 0, bo->size);
1978
1979     for (i = 0; i < MAX_PP_SURFACES; i++) {
1980         if (pp_context->surfaces[i].ss_bo) {
1981             assert(pp_context->surfaces[i].s_bo);
1982
1983             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1984             dri_bo_emit_reloc(bo,
1985                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1986                               0,
1987                               i * sizeof(*binding_table),
1988                               pp_context->surfaces[i].ss_bo);
1989         }
1990     
1991     }
1992
1993     dri_bo_unmap(bo);
1994 }
1995
1996 static void
1997 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1998 {
1999     struct gen6_interface_descriptor_data *desc;
2000     dri_bo *bo;
2001     int pp_index = pp_context->current_pp;
2002
2003     bo = pp_context->idrt.bo;
2004     dri_bo_map(bo, True);
2005     assert(bo->virtual);
2006     desc = bo->virtual;
2007     memset(desc, 0, sizeof(*desc));
2008     desc->desc0.kernel_start_pointer = 
2009         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
2010     desc->desc1.single_program_flow = 1;
2011     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2012     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
2013     desc->desc2.sampler_state_pointer = 
2014         pp_context->sampler_state_table.bo->offset >> 5;
2015     desc->desc3.binding_table_entry_count = 0;
2016     desc->desc3.binding_table_pointer = 
2017         pp_context->binding_table.bo->offset >> 5; /*reloc */
2018     desc->desc4.constant_urb_entry_read_offset = 0;
2019     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2020
2021     dri_bo_emit_reloc(bo,
2022                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2023                       0,
2024                       offsetof(struct gen6_interface_descriptor_data, desc0),
2025                       pp_context->pp_modules[pp_index].kernel.bo);
2026
2027     dri_bo_emit_reloc(bo,
2028                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2029                       desc->desc2.sampler_count << 2,
2030                       offsetof(struct gen6_interface_descriptor_data, desc2),
2031                       pp_context->sampler_state_table.bo);
2032
2033     dri_bo_emit_reloc(bo,
2034                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2035                       desc->desc3.binding_table_entry_count,
2036                       offsetof(struct gen6_interface_descriptor_data, desc3),
2037                       pp_context->binding_table.bo);
2038
2039     dri_bo_unmap(bo);
2040     pp_context->idrt.num_interface_descriptors++;
2041 }
2042
2043 static void
2044 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2045 {
2046     unsigned char *constant_buffer;
2047
2048     assert(sizeof(pp_static_parameter) == 128);
2049     dri_bo_map(pp_context->curbe.bo, 1);
2050     assert(pp_context->curbe.bo->virtual);
2051     constant_buffer = pp_context->curbe.bo->virtual;
2052     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2053     dri_bo_unmap(pp_context->curbe.bo);
2054 }
2055
2056 static void
2057 gen6_pp_states_setup(VADriverContextP ctx)
2058 {
2059     struct i965_driver_data *i965 = i965_driver_data(ctx);
2060     struct i965_post_processing_context *pp_context = i965->pp_context;
2061
2062     gen6_pp_binding_table(pp_context);
2063     gen6_pp_interface_descriptor_table(pp_context);
2064     gen6_pp_upload_constants(pp_context);
2065 }
2066
2067 static void
2068 gen6_pp_pipeline_select(VADriverContextP ctx)
2069 {
2070     struct i965_driver_data *i965 = i965_driver_data(ctx);
2071     struct intel_batchbuffer *batch = i965->batch;
2072
2073     BEGIN_BATCH(batch, 1);
2074     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2075     ADVANCE_BATCH(batch);
2076 }
2077
2078 static void
2079 gen6_pp_state_base_address(VADriverContextP ctx)
2080 {
2081     struct i965_driver_data *i965 = i965_driver_data(ctx);
2082     struct intel_batchbuffer *batch = i965->batch;
2083
2084     BEGIN_BATCH(batch, 10);
2085     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2086     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2087     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2088     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2089     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2090     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2091     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2092     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2093     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2094     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2095     ADVANCE_BATCH(batch);
2096 }
2097
2098 static void
2099 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2100 {
2101     struct i965_driver_data *i965 = i965_driver_data(ctx);
2102     struct intel_batchbuffer *batch = i965->batch;
2103
2104     BEGIN_BATCH(batch, 8);
2105     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2106     OUT_BATCH(batch, 0);
2107     OUT_BATCH(batch,
2108               (pp_context->urb.num_vfe_entries - 1) << 16 |
2109               pp_context->urb.num_vfe_entries << 8);
2110     OUT_BATCH(batch, 0);
2111     OUT_BATCH(batch,
2112               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2113               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2114     OUT_BATCH(batch, 0);
2115     OUT_BATCH(batch, 0);
2116     OUT_BATCH(batch, 0);
2117     ADVANCE_BATCH(batch);
2118 }
2119
2120 static void
2121 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2122 {
2123     struct i965_driver_data *i965 = i965_driver_data(ctx);
2124     struct intel_batchbuffer *batch = i965->batch;
2125
2126     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2127
2128     BEGIN_BATCH(batch, 4);
2129     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2130     OUT_BATCH(batch, 0);
2131     OUT_BATCH(batch,
2132               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2133     OUT_RELOC(batch, 
2134               pp_context->curbe.bo,
2135               I915_GEM_DOMAIN_INSTRUCTION, 0,
2136               0);
2137     ADVANCE_BATCH(batch);
2138 }
2139
2140 static void
2141 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2142 {
2143     struct i965_driver_data *i965 = i965_driver_data(ctx);
2144     struct intel_batchbuffer *batch = i965->batch;
2145
2146     BEGIN_BATCH(batch, 4);
2147     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2148     OUT_BATCH(batch, 0);
2149     OUT_BATCH(batch,
2150               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2151     OUT_RELOC(batch, 
2152               pp_context->idrt.bo,
2153               I915_GEM_DOMAIN_INSTRUCTION, 0,
2154               0);
2155     ADVANCE_BATCH(batch);
2156 }
2157
2158 static void
2159 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2160 {
2161     struct i965_driver_data *i965 = i965_driver_data(ctx);
2162     struct intel_batchbuffer *batch = i965->batch;
2163     int x, x_steps, y, y_steps;
2164
2165     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2166     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2167
2168     for (y = 0; y < y_steps; y++) {
2169         for (x = 0; x < x_steps; x++) {
2170             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2171                 BEGIN_BATCH(batch, 22);
2172                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2173                 OUT_BATCH(batch, 0);
2174                 OUT_BATCH(batch, 0); /* no indirect data */
2175                 OUT_BATCH(batch, 0);
2176                 OUT_BATCH(batch, 0); /* scoreboard */
2177                 OUT_BATCH(batch, 0);
2178
2179                 /* inline data grf 5-6 */
2180                 assert(sizeof(pp_inline_parameter) == 64);
2181                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2182
2183                 ADVANCE_BATCH(batch);
2184             }
2185         }
2186     }
2187 }
2188
2189 static void
2190 gen6_pp_pipeline_setup(VADriverContextP ctx)
2191 {
2192     struct i965_driver_data *i965 = i965_driver_data(ctx);
2193     struct intel_batchbuffer *batch = i965->batch;
2194     struct i965_post_processing_context *pp_context = i965->pp_context;
2195
2196     intel_batchbuffer_start_atomic(batch, 0x1000);
2197     intel_batchbuffer_emit_mi_flush(batch);
2198     gen6_pp_pipeline_select(ctx);
2199     gen6_pp_curbe_load(ctx, pp_context);
2200     gen6_interface_descriptor_load(ctx, pp_context);
2201     gen6_pp_state_base_address(ctx);
2202     gen6_pp_vfe_state(ctx, pp_context);
2203     gen6_pp_object_walker(ctx, pp_context);
2204     intel_batchbuffer_end_atomic(batch);
2205 }
2206
2207 static void
2208 gen6_post_processing(VADriverContextP ctx,
2209                      VASurfaceID surface,
2210                      int input,
2211                      short srcx,
2212                      short srcy,
2213                      unsigned short srcw,
2214                      unsigned short srch,
2215                      short destx,
2216                      short desty,
2217                      unsigned short destw,
2218                      unsigned short desth,
2219                      int pp_index)
2220 {
2221     gen6_pp_initialize(ctx, surface, input,
2222                        srcx, srcy, srcw, srch,
2223                        destx, desty, destw, desth,
2224                        pp_index);
2225     gen6_pp_states_setup(ctx);
2226     gen6_pp_pipeline_setup(ctx);
2227 }
2228
2229 static void
2230 i965_post_processing_internal(VADriverContextP ctx,
2231                               VASurfaceID surface,
2232                               int input,
2233                               short srcx,
2234                               short srcy,
2235                               unsigned short srcw,
2236                               unsigned short srch,
2237                               short destx,
2238                               short desty,
2239                               unsigned short destw,
2240                               unsigned short desth,
2241                               int pp_index)
2242 {
2243     struct i965_driver_data *i965 = i965_driver_data(ctx);
2244
2245     if (IS_GEN6(i965->intel.device_id))
2246         gen6_post_processing(ctx, surface, input,
2247                              srcx, srcy, srcw, srch,
2248                              destx, desty, destw, desth,
2249                              pp_index);
2250     else
2251         ironlake_post_processing(ctx, surface, input,
2252                                  srcx, srcy, srcw, srch,
2253                                  destx, desty, destw, desth,
2254                                  pp_index);
2255 }
2256
2257 void
2258 i965_post_processing(VADriverContextP ctx,
2259                      VASurfaceID surface,
2260                      short srcx,
2261                      short srcy,
2262                      unsigned short srcw,
2263                      unsigned short srch,
2264                      short destx,
2265                      short desty,
2266                      unsigned short destw,
2267                      unsigned short desth,
2268                      unsigned int flag)
2269 {
2270     struct i965_driver_data *i965 = i965_driver_data(ctx);
2271
2272     if (HAS_PP(i965)) {
2273         /* Currently only support post processing for NV12 surface */
2274         if (i965->render_state.interleaved_uv) {
2275             int internal_input = 0;
2276
2277             if (flag & I965_PP_FLAG_DEINTERLACING) {
2278                 i965_post_processing_internal(ctx, surface, internal_input,
2279                                               srcx, srcy, srcw, srch,
2280                                               destx, desty, destw, desth,
2281                                               PP_NV12_DNDI);
2282                 internal_input = 1;
2283             }
2284
2285             if (flag & I965_PP_FLAG_AVS) {
2286                 i965_post_processing_internal(ctx, surface, internal_input,
2287                                               srcx, srcy, srcw, srch,
2288                                               destx, desty, destw, desth,
2289                                               PP_NV12_AVS);
2290             }
2291         }
2292     }
2293 }       
2294
2295 Bool
2296 i965_post_processing_terminate(VADriverContextP ctx)
2297 {
2298     struct i965_driver_data *i965 = i965_driver_data(ctx);
2299     struct i965_post_processing_context *pp_context = i965->pp_context;
2300     int i;
2301
2302     if (HAS_PP(i965)) {
2303         if (pp_context) {
2304             dri_bo_unreference(pp_context->curbe.bo);
2305             pp_context->curbe.bo = NULL;
2306
2307             for (i = 0; i < MAX_PP_SURFACES; i++) {
2308                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2309                 pp_context->surfaces[i].ss_bo = NULL;
2310
2311                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2312                 pp_context->surfaces[i].s_bo = NULL;
2313             }
2314
2315             dri_bo_unreference(pp_context->sampler_state_table.bo);
2316             pp_context->sampler_state_table.bo = NULL;
2317
2318             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2319             pp_context->sampler_state_table.bo_8x8 = NULL;
2320
2321             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2322             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2323
2324             dri_bo_unreference(pp_context->binding_table.bo);
2325             pp_context->binding_table.bo = NULL;
2326
2327             dri_bo_unreference(pp_context->idrt.bo);
2328             pp_context->idrt.bo = NULL;
2329             pp_context->idrt.num_interface_descriptors = 0;
2330
2331             dri_bo_unreference(pp_context->vfe_state.bo);
2332             pp_context->vfe_state.bo = NULL;
2333
2334             dri_bo_unreference(pp_context->stmm.bo);
2335             pp_context->stmm.bo = NULL;
2336
2337             for (i = 0; i < NUM_PP_MODULES; i++) {
2338                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2339
2340                 dri_bo_unreference(pp_module->kernel.bo);
2341                 pp_module->kernel.bo = NULL;
2342             }
2343
2344             free(pp_context);
2345         }
2346
2347         i965->pp_context = NULL;
2348     }
2349
2350     return True;
2351 }
2352
2353 Bool
2354 i965_post_processing_init(VADriverContextP ctx)
2355 {
2356     struct i965_driver_data *i965 = i965_driver_data(ctx);
2357     struct i965_post_processing_context *pp_context = i965->pp_context;
2358     int i;
2359
2360     if (HAS_PP(i965)) {
2361         if (pp_context == NULL) {
2362             pp_context = calloc(1, sizeof(*pp_context));
2363             i965->pp_context = pp_context;
2364
2365             pp_context->urb.size = URB_SIZE((&i965->intel));
2366             pp_context->urb.num_vfe_entries = 32;
2367             pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2368             pp_context->urb.num_cs_entries = 1;
2369             pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2370             pp_context->urb.vfe_start = 0;
2371             pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2372                 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2373             assert(pp_context->urb.cs_start + 
2374                    pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2375
2376             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2377             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2378
2379             if (IS_GEN6(i965->intel.device_id))
2380                 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2381             else if (IS_IRONLAKE(i965->intel.device_id))
2382                 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2383
2384             for (i = 0; i < NUM_PP_MODULES; i++) {
2385                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2386                 dri_bo_unreference(pp_module->kernel.bo);
2387                 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2388                                                     pp_module->kernel.name,
2389                                                     pp_module->kernel.size,
2390                                                     4096);
2391                 assert(pp_module->kernel.bo);
2392                 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2393             }
2394         }
2395     }
2396
2397     return True;
2398 }