i965_drv_video: improved MV quality for VME
[platform/upstream/libva.git] / i965_drv_video / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 static const uint32_t pp_null_gen5[][4] = {
49 #include "shaders/post_processing/null.g4b.gen5"
50 };
51
52 static const uint32_t pp_nv12_load_save_gen5[][4] = {
53 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
54 };
55
56 static const uint32_t pp_nv12_scaling_gen5[][4] = {
57 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_avs_gen5[][4] = {
61 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_dndi_gen5[][4] = {
65 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
66 };
67
68 static void pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
69                                unsigned short srcw, unsigned short srch,
70                                unsigned short destw, unsigned short desth);
71 static void pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
72                                    unsigned short srcw, unsigned short srch,
73                                    unsigned short destw, unsigned short desth);
74 static void pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
75                                        unsigned short srcw, unsigned short srch,
76                                        unsigned short destw, unsigned short desth);
77 static void pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
78                                          unsigned short srcw, unsigned short srch,
79                                          unsigned short destw, unsigned short desth);
80 static void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
81                                     unsigned short srcw, unsigned short srch,
82                                     unsigned short destw, unsigned short desth);
83
84 static struct pp_module pp_modules_gen5[] = {
85     {
86         {
87             "NULL module (for testing)",
88             PP_NULL,
89             pp_null_gen5,
90             sizeof(pp_null_gen5),
91             NULL,
92         },
93
94         pp_null_initialize,
95     },
96
97     {
98         {
99             "NV12 Load & Save module",
100             PP_NV12_LOAD_SAVE,
101             pp_nv12_load_save_gen5,
102             sizeof(pp_nv12_load_save_gen5),
103             NULL,
104         },
105
106         pp_nv12_load_save_initialize,
107     },
108
109     {
110         {
111             "NV12 Scaling module",
112             PP_NV12_SCALING,
113             pp_nv12_scaling_gen5,
114             sizeof(pp_nv12_scaling_gen5),
115             NULL,
116         },
117
118         pp_nv12_scaling_initialize,
119     },
120
121     {
122         {
123             "NV12 AVS module",
124             PP_NV12_AVS,
125             pp_nv12_avs_gen5,
126             sizeof(pp_nv12_avs_gen5),
127             NULL,
128         },
129
130         pp_nv12_avs_initialize,
131     },
132
133     {
134         {
135             "NV12 DNDI module",
136             PP_NV12_DNDI,
137             pp_nv12_dndi_gen5,
138             sizeof(pp_nv12_dndi_gen5),
139             NULL,
140         },
141
142         pp_nv12_dndi_initialize,
143     },
144 };
145
146 static const uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
148 };
149
150 static const uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
152 };
153
154 static const uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
156 };
157
158 static const uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
160 };
161
162 static const uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
164 };
165
166 static struct pp_module pp_modules_gen6[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen6,
172             sizeof(pp_null_gen6),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12 Load & Save module",
182             PP_NV12_LOAD_SAVE,
183             pp_nv12_load_save_gen6,
184             sizeof(pp_nv12_load_save_gen6),
185             NULL,
186         },
187
188         pp_nv12_load_save_initialize,
189     },
190
191     {
192         {
193             "NV12 Scaling module",
194             PP_NV12_SCALING,
195             pp_nv12_scaling_gen6,
196             sizeof(pp_nv12_scaling_gen6),
197             NULL,
198         },
199
200         pp_nv12_scaling_initialize,
201     },
202
203     {
204         {
205             "NV12 AVS module",
206             PP_NV12_AVS,
207             pp_nv12_avs_gen6,
208             sizeof(pp_nv12_avs_gen6),
209             NULL,
210         },
211
212         pp_nv12_avs_initialize,
213     },
214
215     {
216         {
217             "NV12 DNDI module",
218             PP_NV12_DNDI,
219             pp_nv12_dndi_gen6,
220             sizeof(pp_nv12_dndi_gen6),
221             NULL,
222         },
223
224         pp_nv12_dndi_initialize,
225     },
226 };
227
228 #define pp_static_parameter     pp_context->pp_static_parameter
229 #define pp_inline_parameter     pp_context->pp_inline_parameter
230
231 static void
232 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234     switch (tiling) {
235     case I915_TILING_NONE:
236         ss->ss3.tiled_surface = 0;
237         ss->ss3.tile_walk = 0;
238         break;
239     case I915_TILING_X:
240         ss->ss3.tiled_surface = 1;
241         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242         break;
243     case I915_TILING_Y:
244         ss->ss3.tiled_surface = 1;
245         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246         break;
247     }
248 }
249
250 static void
251 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253     switch (tiling) {
254     case I915_TILING_NONE:
255         ss->ss2.tiled_surface = 0;
256         ss->ss2.tile_walk = 0;
257         break;
258     case I915_TILING_X:
259         ss->ss2.tiled_surface = 1;
260         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261         break;
262     case I915_TILING_Y:
263         ss->ss2.tiled_surface = 1;
264         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265         break;
266     }
267 }
268
269 static void
270 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
271 {
272
273 }
274
275 static void
276 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
277 {
278     struct i965_interface_descriptor *desc;
279     dri_bo *bo;
280     int pp_index = pp_context->current_pp;
281
282     bo = pp_context->idrt.bo;
283     dri_bo_map(bo, 1);
284     assert(bo->virtual);
285     desc = bo->virtual;
286     memset(desc, 0, sizeof(*desc));
287     desc->desc0.grf_reg_blocks = 10;
288     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
289     desc->desc1.const_urb_entry_read_offset = 0;
290     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
291     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
292     desc->desc2.sampler_count = 0;
293     desc->desc3.binding_table_entry_count = 0;
294     desc->desc3.binding_table_pointer = 
295         pp_context->binding_table.bo->offset >> 5; /*reloc */
296
297     dri_bo_emit_reloc(bo,
298                       I915_GEM_DOMAIN_INSTRUCTION, 0,
299                       desc->desc0.grf_reg_blocks,
300                       offsetof(struct i965_interface_descriptor, desc0),
301                       pp_context->pp_modules[pp_index].kernel.bo);
302
303     dri_bo_emit_reloc(bo,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       desc->desc2.sampler_count << 2,
306                       offsetof(struct i965_interface_descriptor, desc2),
307                       pp_context->sampler_state_table.bo);
308
309     dri_bo_emit_reloc(bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, 0,
311                       desc->desc3.binding_table_entry_count,
312                       offsetof(struct i965_interface_descriptor, desc3),
313                       pp_context->binding_table.bo);
314
315     dri_bo_unmap(bo);
316     pp_context->idrt.num_interface_descriptors++;
317 }
318
319 static void
320 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
321 {
322     unsigned int *binding_table;
323     dri_bo *bo = pp_context->binding_table.bo;
324     int i;
325
326     dri_bo_map(bo, 1);
327     assert(bo->virtual);
328     binding_table = bo->virtual;
329     memset(binding_table, 0, bo->size);
330
331     for (i = 0; i < MAX_PP_SURFACES; i++) {
332         if (pp_context->surfaces[i].ss_bo) {
333             assert(pp_context->surfaces[i].s_bo);
334
335             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
336             dri_bo_emit_reloc(bo,
337                               I915_GEM_DOMAIN_INSTRUCTION, 0,
338                               0,
339                               i * sizeof(*binding_table),
340                               pp_context->surfaces[i].ss_bo);
341         }
342     
343     }
344
345     dri_bo_unmap(bo);
346 }
347
348 static void
349 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
350 {
351     struct i965_vfe_state *vfe_state;
352     dri_bo *bo;
353
354     bo = pp_context->vfe_state.bo;
355     dri_bo_map(bo, 1);
356     assert(bo->virtual);
357     vfe_state = bo->virtual;
358     memset(vfe_state, 0, sizeof(*vfe_state));
359     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
360     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
361     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
362     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
363     vfe_state->vfe1.children_present = 0;
364     vfe_state->vfe2.interface_descriptor_base = 
365         pp_context->idrt.bo->offset >> 4; /* reloc */
366     dri_bo_emit_reloc(bo,
367                       I915_GEM_DOMAIN_INSTRUCTION, 0,
368                       0,
369                       offsetof(struct i965_vfe_state, vfe2),
370                       pp_context->idrt.bo);
371     dri_bo_unmap(bo);
372 }
373
374 static void
375 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
376 {
377     unsigned char *constant_buffer;
378
379     assert(sizeof(pp_static_parameter) == 128);
380     dri_bo_map(pp_context->curbe.bo, 1);
381     assert(pp_context->curbe.bo->virtual);
382     constant_buffer = pp_context->curbe.bo->virtual;
383     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
384     dri_bo_unmap(pp_context->curbe.bo);
385 }
386
387 static void
388 ironlake_pp_states_setup(VADriverContextP ctx)
389 {
390     struct i965_driver_data *i965 = i965_driver_data(ctx);
391     struct i965_post_processing_context *pp_context = i965->pp_context;
392
393     ironlake_pp_surface_state(pp_context);
394     ironlake_pp_binding_table(pp_context);
395     ironlake_pp_interface_descriptor_table(pp_context);
396     ironlake_pp_vfe_state(pp_context);
397     ironlake_pp_upload_constants(pp_context);
398 }
399
400 static void
401 ironlake_pp_pipeline_select(VADriverContextP ctx)
402 {
403     struct i965_driver_data *i965 = i965_driver_data(ctx);
404     struct intel_batchbuffer *batch = i965->batch;
405
406     BEGIN_BATCH(batch, 1);
407     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
408     ADVANCE_BATCH(batch);
409 }
410
411 static void
412 ironlake_pp_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
413 {
414     struct i965_driver_data *i965 = i965_driver_data(ctx);
415     struct intel_batchbuffer *batch = i965->batch;
416     unsigned int vfe_fence, cs_fence;
417
418     vfe_fence = pp_context->urb.cs_start;
419     cs_fence = pp_context->urb.size;
420
421     BEGIN_BATCH(batch, 3);
422     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
423     OUT_BATCH(batch, 0);
424     OUT_BATCH(batch, 
425               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
426               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
427     ADVANCE_BATCH(batch);
428 }
429
430 static void
431 ironlake_pp_state_base_address(VADriverContextP ctx)
432 {
433     struct i965_driver_data *i965 = i965_driver_data(ctx);
434     struct intel_batchbuffer *batch = i965->batch;
435
436     BEGIN_BATCH(batch, 8);
437     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
444     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
445     ADVANCE_BATCH(batch);
446 }
447
448 static void
449 ironlake_pp_state_pointers(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
450 {
451     struct i965_driver_data *i965 = i965_driver_data(ctx);
452     struct intel_batchbuffer *batch = i965->batch;
453
454     BEGIN_BATCH(batch, 3);
455     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
456     OUT_BATCH(batch, 0);
457     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
458     ADVANCE_BATCH(batch);
459 }
460
461 static void 
462 ironlake_pp_cs_urb_layout(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
463 {
464     struct i965_driver_data *i965 = i965_driver_data(ctx);
465     struct intel_batchbuffer *batch = i965->batch;
466
467     BEGIN_BATCH(batch, 2);
468     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
469     OUT_BATCH(batch,
470               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
471               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
472     ADVANCE_BATCH(batch);
473 }
474
475 static void
476 ironlake_pp_constant_buffer(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
477 {
478     struct i965_driver_data *i965 = i965_driver_data(ctx);
479     struct intel_batchbuffer *batch = i965->batch;
480
481     BEGIN_BATCH(batch, 2);
482     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
483     OUT_RELOC(batch, pp_context->curbe.bo,
484               I915_GEM_DOMAIN_INSTRUCTION, 0,
485               pp_context->urb.size_cs_entry - 1);
486     ADVANCE_BATCH(batch);    
487 }
488
489 static void
490 ironlake_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
491 {
492     struct i965_driver_data *i965 = i965_driver_data(ctx);
493     struct intel_batchbuffer *batch = i965->batch;
494     int x, x_steps, y, y_steps;
495
496     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
497     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
498
499     for (y = 0; y < y_steps; y++) {
500         for (x = 0; x < x_steps; x++) {
501             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
502                 BEGIN_BATCH(batch, 20);
503                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
504                 OUT_BATCH(batch, 0);
505                 OUT_BATCH(batch, 0); /* no indirect data */
506                 OUT_BATCH(batch, 0);
507
508                 /* inline data grf 5-6 */
509                 assert(sizeof(pp_inline_parameter) == 64);
510                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
511
512                 ADVANCE_BATCH(batch);
513             }
514         }
515     }
516 }
517
518 static void
519 ironlake_pp_pipeline_setup(VADriverContextP ctx)
520 {
521     struct i965_driver_data *i965 = i965_driver_data(ctx);
522     struct intel_batchbuffer *batch = i965->batch;
523     struct i965_post_processing_context *pp_context = i965->pp_context;
524
525     intel_batchbuffer_start_atomic(batch, 0x1000);
526     intel_batchbuffer_emit_mi_flush(batch);
527     ironlake_pp_pipeline_select(ctx);
528     ironlake_pp_state_base_address(ctx);
529     ironlake_pp_state_pointers(ctx, pp_context);
530     ironlake_pp_urb_layout(ctx, pp_context);
531     ironlake_pp_cs_urb_layout(ctx, pp_context);
532     ironlake_pp_constant_buffer(ctx, pp_context);
533     ironlake_pp_object_walker(ctx, pp_context);
534     intel_batchbuffer_end_atomic(batch);
535 }
536
537 static int
538 pp_null_x_steps(void *private_context)
539 {
540     return 1;
541 }
542
543 static int
544 pp_null_y_steps(void *private_context)
545 {
546     return 1;
547 }
548
549 static int
550 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
551 {
552     return 0;
553 }
554
555 static void
556 pp_null_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
557                    unsigned short srcw, unsigned short srch,
558                    unsigned short destw, unsigned short desth)
559 {
560     struct i965_driver_data *i965 = i965_driver_data(ctx);
561     struct i965_post_processing_context *pp_context = i965->pp_context;
562     struct object_surface *obj_surface;
563
564     /* surface */
565     obj_surface = SURFACE(surface);
566     dri_bo_unreference(obj_surface->pp_out_bo);
567     obj_surface->pp_out_bo = obj_surface->bo;
568     dri_bo_reference(obj_surface->pp_out_bo);
569     assert(obj_surface->pp_out_bo);
570     obj_surface->pp_out_width = obj_surface->width;
571     obj_surface->pp_out_height = obj_surface->height;
572     obj_surface->orig_pp_out_width = obj_surface->orig_width;
573     obj_surface->orig_pp_out_height = obj_surface->orig_height;
574
575     /* private function & data */
576     pp_context->pp_x_steps = pp_null_x_steps;
577     pp_context->pp_y_steps = pp_null_y_steps;
578     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
579 }
580
581 static int
582 pp_load_save_x_steps(void *private_context)
583 {
584     return 1;
585 }
586
587 static int
588 pp_load_save_y_steps(void *private_context)
589 {
590     struct pp_load_save_context *pp_load_save_context = private_context;
591
592     return pp_load_save_context->dest_h / 8;
593 }
594
595 static int
596 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
597 {
598     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
599     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
600     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
601     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
602
603     return 0;
604 }
605
606 static void
607 pp_nv12_load_save_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
608                              unsigned short srcw, unsigned short srch,
609                              unsigned short destw, unsigned short desth)
610 {
611     struct i965_driver_data *i965 = i965_driver_data(ctx);
612     struct i965_post_processing_context *pp_context = i965->pp_context;
613     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
614     struct object_surface *obj_surface;
615     struct i965_surface_state *ss;
616     dri_bo *bo;
617     int index, w, h;
618     int orig_w, orig_h;
619     unsigned int tiling, swizzle;
620
621     /* surface */
622     obj_surface = SURFACE(surface);
623     orig_w = obj_surface->orig_width;
624     orig_h = obj_surface->orig_height;
625     w = obj_surface->width;
626     h = obj_surface->height;
627
628     dri_bo_unreference(obj_surface->pp_out_bo);
629     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
630                                           "intermediate surface",
631                                           SIZE_YUV420(w, h),
632                                           4096);
633     assert(obj_surface->pp_out_bo);
634     obj_surface->pp_out_width = obj_surface->width;
635     obj_surface->pp_out_height = obj_surface->height;
636     obj_surface->orig_pp_out_width = obj_surface->orig_width;
637     obj_surface->orig_pp_out_height = obj_surface->orig_height;
638
639     /* source Y surface index 1 */
640     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
641
642     index = 1;
643     pp_context->surfaces[index].s_bo = obj_surface->bo;
644     dri_bo_reference(pp_context->surfaces[index].s_bo);
645     bo = dri_bo_alloc(i965->intel.bufmgr, 
646                       "surface state", 
647                       sizeof(struct i965_surface_state), 
648                       4096);
649     assert(bo);
650     pp_context->surfaces[index].ss_bo = bo;
651     dri_bo_map(bo, True);
652     assert(bo->virtual);
653     ss = bo->virtual;
654     memset(ss, 0, sizeof(*ss));
655     ss->ss0.surface_type = I965_SURFACE_2D;
656     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
657     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
658     ss->ss2.width = orig_w / 4 - 1;
659     ss->ss2.height = orig_h - 1;
660     ss->ss3.pitch = w - 1;
661     pp_set_surface_tiling(ss, tiling);
662     dri_bo_emit_reloc(bo,
663                       I915_GEM_DOMAIN_RENDER, 
664                       0,
665                       0,
666                       offsetof(struct i965_surface_state, ss1),
667                       pp_context->surfaces[index].s_bo);
668     dri_bo_unmap(bo);
669
670     /* source UV surface index 2 */
671     index = 2;
672     pp_context->surfaces[index].s_bo = obj_surface->bo;
673     dri_bo_reference(pp_context->surfaces[index].s_bo);
674     bo = dri_bo_alloc(i965->intel.bufmgr, 
675                       "surface state", 
676                       sizeof(struct i965_surface_state), 
677                       4096);
678     assert(bo);
679     pp_context->surfaces[index].ss_bo = bo;
680     dri_bo_map(bo, True);
681     assert(bo->virtual);
682     ss = bo->virtual;
683     memset(ss, 0, sizeof(*ss));
684     ss->ss0.surface_type = I965_SURFACE_2D;
685     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
686     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
687     ss->ss2.width = orig_w / 4 - 1;
688     ss->ss2.height = orig_h / 2 - 1;
689     ss->ss3.pitch = w - 1;
690     pp_set_surface_tiling(ss, tiling);
691     dri_bo_emit_reloc(bo,
692                       I915_GEM_DOMAIN_RENDER, 
693                       0,
694                       w * h,
695                       offsetof(struct i965_surface_state, ss1),
696                       pp_context->surfaces[index].s_bo);
697     dri_bo_unmap(bo);
698
699     /* destination Y surface index 7 */
700     index = 7;
701     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
702     dri_bo_reference(pp_context->surfaces[index].s_bo);
703     bo = dri_bo_alloc(i965->intel.bufmgr, 
704                       "surface state", 
705                       sizeof(struct i965_surface_state), 
706                       4096);
707     assert(bo);
708     pp_context->surfaces[index].ss_bo = bo;
709     dri_bo_map(bo, True);
710     assert(bo->virtual);
711     ss = bo->virtual;
712     memset(ss, 0, sizeof(*ss));
713     ss->ss0.surface_type = I965_SURFACE_2D;
714     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
715     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
716     ss->ss2.width = orig_w / 4 - 1;
717     ss->ss2.height = orig_h - 1;
718     ss->ss3.pitch = w - 1;
719     dri_bo_emit_reloc(bo,
720                       I915_GEM_DOMAIN_RENDER, 
721                       I915_GEM_DOMAIN_RENDER,
722                       0,
723                       offsetof(struct i965_surface_state, ss1),
724                       pp_context->surfaces[index].s_bo);
725     dri_bo_unmap(bo);
726
727     /* destination UV surface index 8 */
728     index = 8;
729     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
730     dri_bo_reference(pp_context->surfaces[index].s_bo);
731     bo = dri_bo_alloc(i965->intel.bufmgr, 
732                       "surface state", 
733                       sizeof(struct i965_surface_state), 
734                       4096);
735     assert(bo);
736     pp_context->surfaces[index].ss_bo = bo;
737     dri_bo_map(bo, True);
738     assert(bo->virtual);
739     ss = bo->virtual;
740     memset(ss, 0, sizeof(*ss));
741     ss->ss0.surface_type = I965_SURFACE_2D;
742     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
743     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
744     ss->ss2.width = orig_w / 4 - 1;
745     ss->ss2.height = orig_h / 2 - 1;
746     ss->ss3.pitch = w - 1;
747     dri_bo_emit_reloc(bo,
748                       I915_GEM_DOMAIN_RENDER, 
749                       I915_GEM_DOMAIN_RENDER,
750                       w * h,
751                       offsetof(struct i965_surface_state, ss1),
752                       pp_context->surfaces[index].s_bo);
753     dri_bo_unmap(bo);
754
755     /* private function & data */
756     pp_context->pp_x_steps = pp_load_save_x_steps;
757     pp_context->pp_y_steps = pp_load_save_y_steps;
758     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
759     pp_load_save_context->dest_h = h;
760     pp_load_save_context->dest_w = w;
761
762     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
763     pp_inline_parameter.grf5.number_blocks = w / 16;
764 }
765
766 static int
767 pp_scaling_x_steps(void *private_context)
768 {
769     return 1;
770 }
771
772 static int
773 pp_scaling_y_steps(void *private_context)
774 {
775     struct pp_scaling_context *pp_scaling_context = private_context;
776
777     return pp_scaling_context->dest_h / 8;
778 }
779
780 static int
781 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
782 {
783     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
784     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
785
786     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16;
787     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
788     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
789     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
790     
791     return 0;
792 }
793
794 static void
795 pp_nv12_scaling_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
796                            unsigned short srcw, unsigned short srch,
797                            unsigned short destw, unsigned short desth)
798 {
799     struct i965_driver_data *i965 = i965_driver_data(ctx);
800     struct i965_post_processing_context *pp_context = i965->pp_context;
801     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
802     struct object_surface *obj_surface;
803     struct i965_sampler_state *sampler_state;
804     struct i965_surface_state *ss;
805     dri_bo *bo;
806     int index;
807     int w, h;
808     int orig_w, orig_h;
809     int pp_out_w, pp_out_h;
810     int orig_pp_out_w, orig_pp_out_h;
811     unsigned int tiling, swizzle;
812
813     /* surface */
814     obj_surface = SURFACE(surface);
815     orig_w = obj_surface->orig_width;
816     orig_h = obj_surface->orig_height;
817     w = obj_surface->width;
818     h = obj_surface->height;
819
820     orig_pp_out_w = destw;
821     orig_pp_out_h = desth;
822     pp_out_w = ALIGN(orig_pp_out_w, 16);
823     pp_out_h = ALIGN(orig_pp_out_h, 16);
824     dri_bo_unreference(obj_surface->pp_out_bo);
825     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
826                                           "intermediate surface",
827                                           SIZE_YUV420(pp_out_w, pp_out_h),
828                                           4096);
829     assert(obj_surface->pp_out_bo);
830     obj_surface->orig_pp_out_width = orig_pp_out_w;
831     obj_surface->orig_pp_out_height = orig_pp_out_h;
832     obj_surface->pp_out_width = pp_out_w;
833     obj_surface->pp_out_height = pp_out_h;
834
835     /* source Y surface index 1 */
836     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
837
838     index = 1;
839     pp_context->surfaces[index].s_bo = obj_surface->bo;
840     dri_bo_reference(pp_context->surfaces[index].s_bo);
841     bo = dri_bo_alloc(i965->intel.bufmgr, 
842                       "surface state", 
843                       sizeof(struct i965_surface_state), 
844                       4096);
845     assert(bo);
846     pp_context->surfaces[index].ss_bo = bo;
847     dri_bo_map(bo, True);
848     assert(bo->virtual);
849     ss = bo->virtual;
850     memset(ss, 0, sizeof(*ss));
851     ss->ss0.surface_type = I965_SURFACE_2D;
852     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
853     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
854     ss->ss2.width = orig_w - 1;
855     ss->ss2.height = orig_h - 1;
856     ss->ss3.pitch = w - 1;
857     pp_set_surface_tiling(ss, tiling);
858     dri_bo_emit_reloc(bo,
859                       I915_GEM_DOMAIN_RENDER, 
860                       0,
861                       0,
862                       offsetof(struct i965_surface_state, ss1),
863                       pp_context->surfaces[index].s_bo);
864     dri_bo_unmap(bo);
865
866     /* source UV surface index 2 */
867     index = 2;
868     pp_context->surfaces[index].s_bo = obj_surface->bo;
869     dri_bo_reference(pp_context->surfaces[index].s_bo);
870     bo = dri_bo_alloc(i965->intel.bufmgr, 
871                       "surface state", 
872                       sizeof(struct i965_surface_state), 
873                       4096);
874     assert(bo);
875     pp_context->surfaces[index].ss_bo = bo;
876     dri_bo_map(bo, True);
877     assert(bo->virtual);
878     ss = bo->virtual;
879     memset(ss, 0, sizeof(*ss));
880     ss->ss0.surface_type = I965_SURFACE_2D;
881     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
882     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
883     ss->ss2.width = orig_w / 2 - 1;
884     ss->ss2.height = orig_h / 2 - 1;
885     ss->ss3.pitch = w - 1;
886     pp_set_surface_tiling(ss, tiling);
887     dri_bo_emit_reloc(bo,
888                       I915_GEM_DOMAIN_RENDER, 
889                       0,
890                       w * h,
891                       offsetof(struct i965_surface_state, ss1),
892                       pp_context->surfaces[index].s_bo);
893     dri_bo_unmap(bo);
894
895     /* destination Y surface index 7 */
896     index = 7;
897     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
898     dri_bo_reference(pp_context->surfaces[index].s_bo);
899     bo = dri_bo_alloc(i965->intel.bufmgr, 
900                       "surface state", 
901                       sizeof(struct i965_surface_state), 
902                       4096);
903     assert(bo);
904     pp_context->surfaces[index].ss_bo = bo;
905     dri_bo_map(bo, True);
906     assert(bo->virtual);
907     ss = bo->virtual;
908     memset(ss, 0, sizeof(*ss));
909     ss->ss0.surface_type = I965_SURFACE_2D;
910     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
911     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
912     ss->ss2.width = pp_out_w / 4 - 1;
913     ss->ss2.height = pp_out_h - 1;
914     ss->ss3.pitch = pp_out_w - 1;
915     dri_bo_emit_reloc(bo,
916                       I915_GEM_DOMAIN_RENDER, 
917                       I915_GEM_DOMAIN_RENDER,
918                       0,
919                       offsetof(struct i965_surface_state, ss1),
920                       pp_context->surfaces[index].s_bo);
921     dri_bo_unmap(bo);
922
923     /* destination UV surface index 8 */
924     index = 8;
925     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
926     dri_bo_reference(pp_context->surfaces[index].s_bo);
927     bo = dri_bo_alloc(i965->intel.bufmgr, 
928                       "surface state", 
929                       sizeof(struct i965_surface_state), 
930                       4096);
931     assert(bo);
932     pp_context->surfaces[index].ss_bo = bo;
933     dri_bo_map(bo, True);
934     assert(bo->virtual);
935     ss = bo->virtual;
936     memset(ss, 0, sizeof(*ss));
937     ss->ss0.surface_type = I965_SURFACE_2D;
938     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
939     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
940     ss->ss2.width = pp_out_w / 4 - 1;
941     ss->ss2.height = pp_out_h / 2 - 1;
942     ss->ss3.pitch = pp_out_w - 1;
943     dri_bo_emit_reloc(bo,
944                       I915_GEM_DOMAIN_RENDER, 
945                       I915_GEM_DOMAIN_RENDER,
946                       pp_out_w * pp_out_h,
947                       offsetof(struct i965_surface_state, ss1),
948                       pp_context->surfaces[index].s_bo);
949     dri_bo_unmap(bo);
950
951     /* sampler state */
952     dri_bo_map(pp_context->sampler_state_table.bo, True);
953     assert(pp_context->sampler_state_table.bo->virtual);
954     sampler_state = pp_context->sampler_state_table.bo->virtual;
955
956     /* SIMD16 Y index 1 */
957     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
958     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
959     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
960     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
961     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
962
963     /* SIMD16 UV index 2 */
964     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
965     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
966     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
967     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
968     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
969
970     dri_bo_unmap(pp_context->sampler_state_table.bo);
971
972     /* private function & data */
973     pp_context->pp_x_steps = pp_scaling_x_steps;
974     pp_context->pp_y_steps = pp_scaling_y_steps;
975     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
976
977     pp_scaling_context->dest_w = pp_out_w;
978     pp_scaling_context->dest_h = pp_out_h;
979
980     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
981     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
982     pp_inline_parameter.grf5.block_count_x = pp_out_w / 16;   /* 1 x N */
983     pp_inline_parameter.grf5.number_blocks = pp_out_w / 16;
984     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
985     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
986 }
987
988 static int
989 pp_avs_x_steps(void *private_context)
990 {
991     struct pp_avs_context *pp_avs_context = private_context;
992
993     return pp_avs_context->dest_w / 16;
994 }
995
996 static int
997 pp_avs_y_steps(void *private_context)
998 {
999     return 1;
1000 }
1001
1002 static int
1003 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1004 {
1005     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1006     float src_x_steping, src_y_steping, video_step_delta;
1007     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
1008
1009     if (tmp_w >= pp_avs_context->dest_w) {
1010         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1011         pp_inline_parameter.grf6.video_step_delta = 0;
1012         
1013         if (x == 0) {
1014             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2;
1015         } else {
1016             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1017             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1018             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1019                 16 * 15 * video_step_delta / 2;
1020         }
1021     } else {
1022         int n0, n1, n2, nls_left, nls_right;
1023         int factor_a = 5, factor_b = 4;
1024         float f;
1025
1026         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
1027         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
1028         n2 = tmp_w / (16 * factor_a);
1029         nls_left = n0 + n2;
1030         nls_right = n1 + n2;
1031         f = (float) n2 * 16 / tmp_w;
1032         
1033         if (n0 < 5) {
1034             pp_inline_parameter.grf6.video_step_delta = 0.0;
1035
1036             if (x == 0) {
1037                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
1038                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1039             } else {
1040                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1041                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1042                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1043                     16 * 15 * video_step_delta / 2;
1044             }
1045         } else {
1046             if (x < nls_left) {
1047                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
1048                 float a = f / (nls_left * 16 * factor_b);
1049                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
1050                 
1051                 pp_inline_parameter.grf6.video_step_delta = b;
1052
1053                 if (x == 0) {
1054                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = 0.0;
1055                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
1056                 } else {
1057                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1058                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1059                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1060                         16 * 15 * video_step_delta / 2;
1061                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
1062                 }
1063             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
1064                 /* scale the center linearly */
1065                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1066                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1067                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1068                     16 * 15 * video_step_delta / 2;
1069                 pp_inline_parameter.grf6.video_step_delta = 0.0;
1070                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
1071             } else {
1072                 float a = f / (nls_right * 16 * factor_b);
1073                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
1074
1075                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
1076                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
1077                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
1078                     16 * 15 * video_step_delta / 2;
1079                 pp_inline_parameter.grf6.video_step_delta = -b;
1080
1081                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
1082                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
1083                 else
1084                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
1085             }
1086         }
1087     }
1088
1089     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
1090     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8;
1091     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1092     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
1093
1094     return 0;
1095 }
1096
1097 static void
1098 pp_nv12_avs_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1099                        unsigned short srcw, unsigned short srch,
1100                        unsigned short destw, unsigned short desth)
1101 {
1102     struct i965_driver_data *i965 = i965_driver_data(ctx);
1103     struct i965_post_processing_context *pp_context = i965->pp_context;
1104     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
1105     struct object_surface *obj_surface;
1106     struct i965_surface_state *ss;
1107     struct i965_sampler_8x8 *sampler_8x8;
1108     struct i965_sampler_8x8_state *sampler_8x8_state;
1109     struct i965_surface_state2 *ss_8x8;
1110     dri_bo *bo, *src_bo;
1111     int index;
1112     int w, h;
1113     int orig_w, orig_h;
1114     int pp_out_w, pp_out_h;
1115     int orig_pp_out_w, orig_pp_out_h;
1116     unsigned int tiling, swizzle;
1117
1118     /* surface */
1119     obj_surface = SURFACE(surface);
1120     
1121     if (input == 1) {
1122         orig_w = obj_surface->orig_pp_out_width;
1123         orig_h = obj_surface->orig_pp_out_height;
1124         w = obj_surface->pp_out_width;
1125         h = obj_surface->pp_out_height;
1126         src_bo = obj_surface->pp_out_bo;
1127     } else {
1128         orig_w = obj_surface->orig_width;
1129         orig_h = obj_surface->orig_height;
1130         w = obj_surface->width;
1131         h = obj_surface->height;
1132         src_bo = obj_surface->bo;
1133     }
1134
1135     assert(src_bo);
1136     dri_bo_get_tiling(src_bo, &tiling, &swizzle);
1137
1138     /* source Y surface index 1 */
1139     index = 1;
1140     pp_context->surfaces[index].s_bo = src_bo;
1141     dri_bo_reference(pp_context->surfaces[index].s_bo);
1142     bo = dri_bo_alloc(i965->intel.bufmgr, 
1143                       "Y surface state for sample_8x8", 
1144                       sizeof(struct i965_surface_state2), 
1145                       4096);
1146     assert(bo);
1147     pp_context->surfaces[index].ss_bo = bo;
1148     dri_bo_map(bo, True);
1149     assert(bo->virtual);
1150     ss_8x8 = bo->virtual;
1151     memset(ss_8x8, 0, sizeof(*ss_8x8));
1152     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1153     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1154     ss_8x8->ss1.width = orig_w - 1;
1155     ss_8x8->ss1.height = orig_h - 1;
1156     ss_8x8->ss2.half_pitch_for_chroma = 0;
1157     ss_8x8->ss2.pitch = w - 1;
1158     ss_8x8->ss2.interleave_chroma = 0;
1159     ss_8x8->ss2.surface_format = SURFACE_FORMAT_Y8_UNORM;
1160     ss_8x8->ss3.x_offset_for_cb = 0;
1161     ss_8x8->ss3.y_offset_for_cb = 0;
1162     pp_set_surface2_tiling(ss_8x8, tiling);
1163     dri_bo_emit_reloc(bo,
1164                       I915_GEM_DOMAIN_RENDER, 
1165                       0,
1166                       0,
1167                       offsetof(struct i965_surface_state2, ss0),
1168                       pp_context->surfaces[index].s_bo);
1169     dri_bo_unmap(bo);
1170
1171     /* source UV surface index 2 */
1172     index = 2;
1173     pp_context->surfaces[index].s_bo = src_bo;
1174     dri_bo_reference(pp_context->surfaces[index].s_bo);
1175     bo = dri_bo_alloc(i965->intel.bufmgr, 
1176                       "UV surface state for sample_8x8", 
1177                       sizeof(struct i965_surface_state2), 
1178                       4096);
1179     assert(bo);
1180     pp_context->surfaces[index].ss_bo = bo;
1181     dri_bo_map(bo, True);
1182     assert(bo->virtual);
1183     ss_8x8 = bo->virtual;
1184     memset(ss_8x8, 0, sizeof(*ss_8x8));
1185     ss_8x8->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset + w * h;
1186     ss_8x8->ss1.cbcr_pixel_offset_v_direction = 0;
1187     ss_8x8->ss1.width = orig_w - 1;
1188     ss_8x8->ss1.height = orig_h - 1;
1189     ss_8x8->ss2.half_pitch_for_chroma = 0;
1190     ss_8x8->ss2.pitch = w - 1;
1191     ss_8x8->ss2.interleave_chroma = 1;
1192     ss_8x8->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1193     ss_8x8->ss3.x_offset_for_cb = 0;
1194     ss_8x8->ss3.y_offset_for_cb = 0;
1195     pp_set_surface2_tiling(ss_8x8, tiling);
1196     dri_bo_emit_reloc(bo,
1197                       I915_GEM_DOMAIN_RENDER, 
1198                       0,
1199                       w * h,
1200                       offsetof(struct i965_surface_state2, ss0),
1201                       pp_context->surfaces[index].s_bo);
1202     dri_bo_unmap(bo);
1203
1204     orig_pp_out_w = destw;
1205     orig_pp_out_h = desth;
1206     pp_out_w = ALIGN(orig_pp_out_w, 16);
1207     pp_out_h = ALIGN(orig_pp_out_h, 16);
1208     dri_bo_unreference(obj_surface->pp_out_bo);
1209     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1210                                           "intermediate surface",
1211                                           SIZE_YUV420(pp_out_w, pp_out_h),
1212                                           4096);
1213     assert(obj_surface->pp_out_bo);
1214     obj_surface->orig_pp_out_width = orig_pp_out_w;
1215     obj_surface->orig_pp_out_height = orig_pp_out_h;
1216     obj_surface->pp_out_width = pp_out_w;
1217     obj_surface->pp_out_height = pp_out_h;
1218
1219     /* destination Y surface index 7 */
1220     index = 7;
1221     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1222     dri_bo_reference(pp_context->surfaces[index].s_bo);
1223     bo = dri_bo_alloc(i965->intel.bufmgr, 
1224                       "surface state", 
1225                       sizeof(struct i965_surface_state), 
1226                       4096);
1227     assert(bo);
1228     pp_context->surfaces[index].ss_bo = bo;
1229     dri_bo_map(bo, True);
1230     assert(bo->virtual);
1231     ss = bo->virtual;
1232     memset(ss, 0, sizeof(*ss));
1233     ss->ss0.surface_type = I965_SURFACE_2D;
1234     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1235     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1236     ss->ss2.width = pp_out_w / 4 - 1;
1237     ss->ss2.height = pp_out_h - 1;
1238     ss->ss3.pitch = pp_out_w - 1;
1239     dri_bo_emit_reloc(bo,
1240                       I915_GEM_DOMAIN_RENDER, 
1241                       I915_GEM_DOMAIN_RENDER,
1242                       0,
1243                       offsetof(struct i965_surface_state, ss1),
1244                       pp_context->surfaces[index].s_bo);
1245     dri_bo_unmap(bo);
1246
1247     /* destination UV surface index 8 */
1248     index = 8;
1249     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1250     dri_bo_reference(pp_context->surfaces[index].s_bo);
1251     bo = dri_bo_alloc(i965->intel.bufmgr, 
1252                       "surface state", 
1253                       sizeof(struct i965_surface_state), 
1254                       4096);
1255     assert(bo);
1256     pp_context->surfaces[index].ss_bo = bo;
1257     dri_bo_map(bo, True);
1258     assert(bo->virtual);
1259     ss = bo->virtual;
1260     memset(ss, 0, sizeof(*ss));
1261     ss->ss0.surface_type = I965_SURFACE_2D;
1262     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1263     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + pp_out_w * pp_out_h;
1264     ss->ss2.width = pp_out_w / 4 - 1;
1265     ss->ss2.height = pp_out_h / 2 - 1;
1266     ss->ss3.pitch = pp_out_w - 1;
1267     dri_bo_emit_reloc(bo,
1268                       I915_GEM_DOMAIN_RENDER, 
1269                       I915_GEM_DOMAIN_RENDER,
1270                       pp_out_w * pp_out_h,
1271                       offsetof(struct i965_surface_state, ss1),
1272                       pp_context->surfaces[index].s_bo);
1273     dri_bo_unmap(bo);
1274     
1275     /* sampler 8x8 state */
1276     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1277     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1278     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1279     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1280     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1281     sampler_8x8_state->dw136.default_sharpness_level = 0;
1282     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1283     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1284     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1285     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1286
1287     /* sampler 8x8 */
1288     dri_bo_map(pp_context->sampler_state_table.bo, True);
1289     assert(pp_context->sampler_state_table.bo->virtual);
1290     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1291     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1292
1293     /* sample_8x8 Y index 1 */
1294     index = 1;
1295     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1296     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1297     sampler_8x8[index].dw0.ief_bypass = 0;
1298     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1299     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1300     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1301     sampler_8x8[index].dw2.global_noise_estimation = 22;
1302     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1303     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1304     sampler_8x8[index].dw3.strong_edge_weight = 7;
1305     sampler_8x8[index].dw3.regular_weight = 2;
1306     sampler_8x8[index].dw3.non_edge_weight = 0;
1307     sampler_8x8[index].dw3.gain_factor = 40;
1308     sampler_8x8[index].dw4.steepness_boost = 0;
1309     sampler_8x8[index].dw4.steepness_threshold = 0;
1310     sampler_8x8[index].dw4.mr_boost = 0;
1311     sampler_8x8[index].dw4.mr_threshold = 5;
1312     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1313     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1314     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1315     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1316     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1317     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1318     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1319     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1320     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1321     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1322     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1323     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1324     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1325     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1326     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1327     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1328     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1329     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1330     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1331     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1332     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1333     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1334     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1335     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1336     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1337     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1338     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1339     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1340     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1341     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1342     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1343     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1344     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1345     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1346     sampler_8x8[index].dw13.limiter_boost = 0;
1347     sampler_8x8[index].dw13.minimum_limiter = 10;
1348     sampler_8x8[index].dw13.maximum_limiter = 11;
1349     sampler_8x8[index].dw14.clip_limiter = 130;
1350     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1351                       I915_GEM_DOMAIN_RENDER, 
1352                       0,
1353                       0,
1354                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1355                       pp_context->sampler_state_table.bo_8x8);
1356
1357     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1358     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1359     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1360     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1361     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1362     sampler_8x8_state->dw136.default_sharpness_level = 0;
1363     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1364     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1365     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1366     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1367
1368     /* sample_8x8 UV index 2 */
1369     index = 2;
1370     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1371     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1372     sampler_8x8[index].dw0.ief_bypass = 0;
1373     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1374     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1375     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1376     sampler_8x8[index].dw2.global_noise_estimation = 22;
1377     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1378     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1379     sampler_8x8[index].dw3.strong_edge_weight = 7;
1380     sampler_8x8[index].dw3.regular_weight = 2;
1381     sampler_8x8[index].dw3.non_edge_weight = 0;
1382     sampler_8x8[index].dw3.gain_factor = 40;
1383     sampler_8x8[index].dw4.steepness_boost = 0;
1384     sampler_8x8[index].dw4.steepness_threshold = 0;
1385     sampler_8x8[index].dw4.mr_boost = 0;
1386     sampler_8x8[index].dw4.mr_threshold = 5;
1387     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1388     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1389     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1390     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1391     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1392     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1393     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1394     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1395     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1396     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1397     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1398     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1399     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1400     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1401     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1402     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1403     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1404     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1405     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1406     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1407     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1408     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1409     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1410     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1411     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1412     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1413     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1414     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1415     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1416     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1417     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1418     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1419     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1420     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1421     sampler_8x8[index].dw13.limiter_boost = 0;
1422     sampler_8x8[index].dw13.minimum_limiter = 10;
1423     sampler_8x8[index].dw13.maximum_limiter = 11;
1424     sampler_8x8[index].dw14.clip_limiter = 130;
1425     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1426                       I915_GEM_DOMAIN_RENDER, 
1427                       0,
1428                       0,
1429                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1430                       pp_context->sampler_state_table.bo_8x8_uv);
1431
1432     dri_bo_unmap(pp_context->sampler_state_table.bo);
1433
1434     /* private function & data */
1435     pp_context->pp_x_steps = pp_avs_x_steps;
1436     pp_context->pp_y_steps = pp_avs_y_steps;
1437     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1438
1439     pp_avs_context->dest_w = pp_out_w;
1440     pp_avs_context->dest_h = pp_out_h;
1441     pp_avs_context->src_w = w;
1442     pp_avs_context->src_h = h;
1443
1444     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1445     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) 1.0 / pp_out_h;
1446     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) 1.0 / pp_out_w;
1447     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1448     pp_inline_parameter.grf5.number_blocks = pp_out_h / 8;
1449     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1450     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1451     pp_inline_parameter.grf6.video_step_delta = 0.0;
1452 }
1453
1454 static int
1455 pp_dndi_x_steps(void *private_context)
1456 {
1457     return 1;
1458 }
1459
1460 static int
1461 pp_dndi_y_steps(void *private_context)
1462 {
1463     struct pp_dndi_context *pp_dndi_context = private_context;
1464
1465     return pp_dndi_context->dest_h / 4;
1466 }
1467
1468 static int
1469 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1470 {
1471     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1472     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1473
1474     return 0;
1475 }
1476
1477 static 
1478 void pp_nv12_dndi_initialize(VADriverContextP ctx, VASurfaceID surface, int input,
1479                              unsigned short srcw, unsigned short srch,
1480                              unsigned short destw, unsigned short desth)
1481 {
1482     struct i965_driver_data *i965 = i965_driver_data(ctx);
1483     struct i965_post_processing_context *pp_context = i965->pp_context;
1484     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1485     struct object_surface *obj_surface;
1486     struct i965_surface_state *ss;
1487     struct i965_surface_state2 *ss_dndi;
1488     struct i965_sampler_dndi *sampler_dndi;
1489     dri_bo *bo;
1490     int index;
1491     int w, h;
1492     int orig_w, orig_h;
1493     unsigned int tiling, swizzle;
1494
1495     /* surface */
1496     obj_surface = SURFACE(surface);
1497     orig_w = obj_surface->orig_width;
1498     orig_h = obj_surface->orig_height;
1499     w = obj_surface->width;
1500     h = obj_surface->height;
1501
1502     if (pp_context->stmm.bo == NULL) {
1503         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1504                                            "STMM surface",
1505                                            w * h,
1506                                            4096);
1507         assert(pp_context->stmm.bo);
1508     }
1509
1510     dri_bo_unreference(obj_surface->pp_out_bo);
1511     obj_surface->pp_out_bo = dri_bo_alloc(i965->intel.bufmgr,
1512                                           "intermediate surface",
1513                                           SIZE_YUV420(w, h),
1514                                           4096);
1515     assert(obj_surface->pp_out_bo);
1516     obj_surface->orig_pp_out_width = orig_w;
1517     obj_surface->orig_pp_out_height = orig_h;
1518     obj_surface->pp_out_width = w;
1519     obj_surface->pp_out_height = h;
1520
1521     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
1522     /* source UV surface index 2 */
1523     index = 2;
1524     pp_context->surfaces[index].s_bo = obj_surface->bo;
1525     dri_bo_reference(pp_context->surfaces[index].s_bo);
1526     bo = dri_bo_alloc(i965->intel.bufmgr, 
1527                       "surface state", 
1528                       sizeof(struct i965_surface_state), 
1529                       4096);
1530     assert(bo);
1531     pp_context->surfaces[index].ss_bo = bo;
1532     dri_bo_map(bo, True);
1533     assert(bo->virtual);
1534     ss = bo->virtual;
1535     memset(ss, 0, sizeof(*ss));
1536     ss->ss0.surface_type = I965_SURFACE_2D;
1537     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1538     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1539     ss->ss2.width = orig_w / 4 - 1;
1540     ss->ss2.height = orig_h / 2 - 1;
1541     ss->ss3.pitch = w - 1;
1542     pp_set_surface_tiling(ss, tiling);
1543     dri_bo_emit_reloc(bo,
1544                       I915_GEM_DOMAIN_RENDER, 
1545                       0,
1546                       w * h,
1547                       offsetof(struct i965_surface_state, ss1),
1548                       pp_context->surfaces[index].s_bo);
1549     dri_bo_unmap(bo);
1550
1551     /* source YUV surface index 4 */
1552     index = 4;
1553     pp_context->surfaces[index].s_bo = obj_surface->bo;
1554     dri_bo_reference(pp_context->surfaces[index].s_bo);
1555     bo = dri_bo_alloc(i965->intel.bufmgr, 
1556                       "YUV surface state for deinterlace ", 
1557                       sizeof(struct i965_surface_state2), 
1558                       4096);
1559     assert(bo);
1560     pp_context->surfaces[index].ss_bo = bo;
1561     dri_bo_map(bo, True);
1562     assert(bo->virtual);
1563     ss_dndi = bo->virtual;
1564     memset(ss_dndi, 0, sizeof(*ss_dndi));
1565     ss_dndi->ss0.surface_base_address = pp_context->surfaces[index].s_bo->offset;
1566     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 0;
1567     ss_dndi->ss1.width = w - 1;
1568     ss_dndi->ss1.height = h - 1;
1569     ss_dndi->ss1.cbcr_pixel_offset_v_direction = 1;
1570     ss_dndi->ss2.half_pitch_for_chroma = 0;
1571     ss_dndi->ss2.pitch = w - 1;
1572     ss_dndi->ss2.interleave_chroma = 1;
1573     ss_dndi->ss2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
1574     ss_dndi->ss2.half_pitch_for_chroma = 0;
1575     ss_dndi->ss2.tiled_surface = 0;
1576     ss_dndi->ss3.x_offset_for_cb = 0;
1577     ss_dndi->ss3.y_offset_for_cb = h;
1578     pp_set_surface2_tiling(ss_dndi, tiling);
1579     dri_bo_emit_reloc(bo,
1580                       I915_GEM_DOMAIN_RENDER, 
1581                       0,
1582                       0,
1583                       offsetof(struct i965_surface_state2, ss0),
1584                       pp_context->surfaces[index].s_bo);
1585     dri_bo_unmap(bo);
1586
1587     /* source STMM surface index 20 */
1588     index = 20;
1589     pp_context->surfaces[index].s_bo = pp_context->stmm.bo;
1590     dri_bo_reference(pp_context->surfaces[index].s_bo);
1591     bo = dri_bo_alloc(i965->intel.bufmgr, 
1592                       "STMM surface state for deinterlace ", 
1593                       sizeof(struct i965_surface_state2), 
1594                       4096);
1595     assert(bo);
1596     pp_context->surfaces[index].ss_bo = bo;
1597     dri_bo_map(bo, True);
1598     assert(bo->virtual);
1599     ss = bo->virtual;
1600     memset(ss, 0, sizeof(*ss));
1601     ss->ss0.surface_type = I965_SURFACE_2D;
1602     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1603     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1604     ss->ss2.width = w - 1;
1605     ss->ss2.height = h - 1;
1606     ss->ss3.pitch = w - 1;
1607     dri_bo_emit_reloc(bo,
1608                       I915_GEM_DOMAIN_RENDER, 
1609                       I915_GEM_DOMAIN_RENDER,
1610                       0,
1611                       offsetof(struct i965_surface_state, ss1),
1612                       pp_context->surfaces[index].s_bo);
1613     dri_bo_unmap(bo);
1614
1615     /* destination Y surface index 7 */
1616     index = 7;
1617     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1618     dri_bo_reference(pp_context->surfaces[index].s_bo);
1619     bo = dri_bo_alloc(i965->intel.bufmgr, 
1620                       "surface state", 
1621                       sizeof(struct i965_surface_state), 
1622                       4096);
1623     assert(bo);
1624     pp_context->surfaces[index].ss_bo = bo;
1625     dri_bo_map(bo, True);
1626     assert(bo->virtual);
1627     ss = bo->virtual;
1628     memset(ss, 0, sizeof(*ss));
1629     ss->ss0.surface_type = I965_SURFACE_2D;
1630     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
1631     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset;
1632     ss->ss2.width = w / 4 - 1;
1633     ss->ss2.height = h - 1;
1634     ss->ss3.pitch = w - 1;
1635     dri_bo_emit_reloc(bo,
1636                       I915_GEM_DOMAIN_RENDER, 
1637                       I915_GEM_DOMAIN_RENDER,
1638                       0,
1639                       offsetof(struct i965_surface_state, ss1),
1640                       pp_context->surfaces[index].s_bo);
1641     dri_bo_unmap(bo);
1642
1643     /* destination UV surface index 8 */
1644     index = 8;
1645     pp_context->surfaces[index].s_bo = obj_surface->pp_out_bo;
1646     dri_bo_reference(pp_context->surfaces[index].s_bo);
1647     bo = dri_bo_alloc(i965->intel.bufmgr, 
1648                       "surface state", 
1649                       sizeof(struct i965_surface_state), 
1650                       4096);
1651     assert(bo);
1652     pp_context->surfaces[index].ss_bo = bo;
1653     dri_bo_map(bo, True);
1654     assert(bo->virtual);
1655     ss = bo->virtual;
1656     memset(ss, 0, sizeof(*ss));
1657     ss->ss0.surface_type = I965_SURFACE_2D;
1658     ss->ss0.surface_format = I965_SURFACEFORMAT_R8G8_UNORM;
1659     ss->ss1.base_addr = pp_context->surfaces[index].s_bo->offset + w * h;
1660     ss->ss2.width = w / 4 - 1;
1661     ss->ss2.height = h / 2 - 1;
1662     ss->ss3.pitch = w - 1;
1663     dri_bo_emit_reloc(bo,
1664                       I915_GEM_DOMAIN_RENDER, 
1665                       I915_GEM_DOMAIN_RENDER,
1666                       w * h,
1667                       offsetof(struct i965_surface_state, ss1),
1668                       pp_context->surfaces[index].s_bo);
1669     dri_bo_unmap(bo);
1670
1671     /* sampler dndi */
1672     dri_bo_map(pp_context->sampler_state_table.bo, True);
1673     assert(pp_context->sampler_state_table.bo->virtual);
1674     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1675     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1676
1677     /* sample dndi index 1 */
1678     index = 0;
1679     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1680     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1681     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1682     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1683
1684     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1685     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1686     sampler_dndi[index].dw1.stmm_c2 = 0;
1687     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1688     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1689
1690     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1691     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1692     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1693     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1694
1695     sampler_dndi[index].dw3.maximum_stmm = 128;
1696     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1697     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1698     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1699     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1700
1701     sampler_dndi[index].dw4.sdi_delta = 8;
1702     sampler_dndi[index].dw4.sdi_threshold = 128;
1703     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1704     sampler_dndi[index].dw4.stmm_shift_up = 0;
1705     sampler_dndi[index].dw4.stmm_shift_down = 0;
1706     sampler_dndi[index].dw4.minimum_stmm = 0;
1707
1708     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1709     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1710     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1711     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1712
1713     sampler_dndi[index].dw6.dn_enable = 1;
1714     sampler_dndi[index].dw6.di_enable = 1;
1715     sampler_dndi[index].dw6.di_partial = 0;
1716     sampler_dndi[index].dw6.dndi_top_first = 1;
1717     sampler_dndi[index].dw6.dndi_stream_id = 1;
1718     sampler_dndi[index].dw6.dndi_first_frame = 1;
1719     sampler_dndi[index].dw6.progressive_dn = 0;
1720     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1721     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1722     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1723
1724     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1725     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1726     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1727     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1728
1729     dri_bo_unmap(pp_context->sampler_state_table.bo);
1730
1731     /* private function & data */
1732     pp_context->pp_x_steps = pp_dndi_x_steps;
1733     pp_context->pp_y_steps = pp_dndi_y_steps;
1734     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1735
1736     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1737     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1738     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1739     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1740
1741     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1742     pp_inline_parameter.grf5.number_blocks = w / 16;
1743     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1744     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1745
1746     pp_dndi_context->dest_w = w;
1747     pp_dndi_context->dest_h = h;
1748 }
1749
1750 static void
1751 ironlake_pp_initialize(VADriverContextP ctx,
1752                        VASurfaceID surface,
1753                        int input,
1754                        short srcx,
1755                        short srcy,
1756                        unsigned short srcw,
1757                        unsigned short srch,
1758                        short destx,
1759                        short desty,
1760                        unsigned short destw,
1761                        unsigned short desth,
1762                        int pp_index)
1763 {
1764     struct i965_driver_data *i965 = i965_driver_data(ctx);
1765     struct i965_post_processing_context *pp_context = i965->pp_context;
1766     struct pp_module *pp_module;
1767     dri_bo *bo;
1768     int i;
1769
1770     dri_bo_unreference(pp_context->curbe.bo);
1771     bo = dri_bo_alloc(i965->intel.bufmgr,
1772                       "constant buffer",
1773                       4096, 
1774                       4096);
1775     assert(bo);
1776     pp_context->curbe.bo = bo;
1777
1778     dri_bo_unreference(pp_context->binding_table.bo);
1779     bo = dri_bo_alloc(i965->intel.bufmgr, 
1780                       "binding table",
1781                       sizeof(unsigned int), 
1782                       4096);
1783     assert(bo);
1784     pp_context->binding_table.bo = bo;
1785
1786     dri_bo_unreference(pp_context->idrt.bo);
1787     bo = dri_bo_alloc(i965->intel.bufmgr, 
1788                       "interface discriptor", 
1789                       sizeof(struct i965_interface_descriptor), 
1790                       4096);
1791     assert(bo);
1792     pp_context->idrt.bo = bo;
1793     pp_context->idrt.num_interface_descriptors = 0;
1794
1795     dri_bo_unreference(pp_context->sampler_state_table.bo);
1796     bo = dri_bo_alloc(i965->intel.bufmgr, 
1797                       "sampler state table", 
1798                       4096,
1799                       4096);
1800     assert(bo);
1801     dri_bo_map(bo, True);
1802     memset(bo->virtual, 0, bo->size);
1803     dri_bo_unmap(bo);
1804     pp_context->sampler_state_table.bo = bo;
1805
1806     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1807     bo = dri_bo_alloc(i965->intel.bufmgr, 
1808                       "sampler 8x8 state ",
1809                       4096,
1810                       4096);
1811     assert(bo);
1812     pp_context->sampler_state_table.bo_8x8 = bo;
1813
1814     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1815     bo = dri_bo_alloc(i965->intel.bufmgr, 
1816                       "sampler 8x8 state ",
1817                       4096,
1818                       4096);
1819     assert(bo);
1820     pp_context->sampler_state_table.bo_8x8_uv = bo;
1821
1822     dri_bo_unreference(pp_context->vfe_state.bo);
1823     bo = dri_bo_alloc(i965->intel.bufmgr, 
1824                       "vfe state", 
1825                       sizeof(struct i965_vfe_state), 
1826                       4096);
1827     assert(bo);
1828     pp_context->vfe_state.bo = bo;
1829     
1830     for (i = 0; i < MAX_PP_SURFACES; i++) {
1831         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1832         pp_context->surfaces[i].ss_bo = NULL;
1833
1834         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1835         pp_context->surfaces[i].s_bo = NULL;
1836     }
1837
1838     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1839     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1840     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1841     pp_context->current_pp = pp_index;
1842     pp_module = &pp_context->pp_modules[pp_index];
1843     
1844     if (pp_module->initialize)
1845         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1846 }
1847
1848 static void
1849 ironlake_post_processing(VADriverContextP ctx,
1850                          VASurfaceID surface,
1851                          int input,
1852                          short srcx,
1853                          short srcy,
1854                          unsigned short srcw,
1855                          unsigned short srch,
1856                          short destx,
1857                          short desty,
1858                          unsigned short destw,
1859                          unsigned short desth,
1860                          int pp_index)
1861 {
1862     ironlake_pp_initialize(ctx, surface, input,
1863                            srcx, srcy, srcw, srch,
1864                            destx, desty, destw, desth,
1865                            pp_index);
1866     ironlake_pp_states_setup(ctx);
1867     ironlake_pp_pipeline_setup(ctx);
1868 }
1869
1870 static void
1871 gen6_pp_initialize(VADriverContextP ctx,
1872                    VASurfaceID surface,
1873                    int input,
1874                    short srcx,
1875                    short srcy,
1876                    unsigned short srcw,
1877                    unsigned short srch,
1878                    short destx,
1879                    short desty,
1880                    unsigned short destw,
1881                    unsigned short desth,
1882                    int pp_index)
1883 {
1884     struct i965_driver_data *i965 = i965_driver_data(ctx);
1885     struct i965_post_processing_context *pp_context = i965->pp_context;
1886     struct pp_module *pp_module;
1887     dri_bo *bo;
1888     int i;
1889
1890     dri_bo_unreference(pp_context->curbe.bo);
1891     bo = dri_bo_alloc(i965->intel.bufmgr,
1892                       "constant buffer",
1893                       4096, 
1894                       4096);
1895     assert(bo);
1896     pp_context->curbe.bo = bo;
1897
1898     dri_bo_unreference(pp_context->binding_table.bo);
1899     bo = dri_bo_alloc(i965->intel.bufmgr, 
1900                       "binding table",
1901                       sizeof(unsigned int), 
1902                       4096);
1903     assert(bo);
1904     pp_context->binding_table.bo = bo;
1905
1906     dri_bo_unreference(pp_context->idrt.bo);
1907     bo = dri_bo_alloc(i965->intel.bufmgr, 
1908                       "interface discriptor", 
1909                       sizeof(struct gen6_interface_descriptor_data), 
1910                       4096);
1911     assert(bo);
1912     pp_context->idrt.bo = bo;
1913     pp_context->idrt.num_interface_descriptors = 0;
1914
1915     dri_bo_unreference(pp_context->sampler_state_table.bo);
1916     bo = dri_bo_alloc(i965->intel.bufmgr, 
1917                       "sampler state table", 
1918                       4096,
1919                       4096);
1920     assert(bo);
1921     dri_bo_map(bo, True);
1922     memset(bo->virtual, 0, bo->size);
1923     dri_bo_unmap(bo);
1924     pp_context->sampler_state_table.bo = bo;
1925
1926     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1927     bo = dri_bo_alloc(i965->intel.bufmgr, 
1928                       "sampler 8x8 state ",
1929                       4096,
1930                       4096);
1931     assert(bo);
1932     pp_context->sampler_state_table.bo_8x8 = bo;
1933
1934     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1935     bo = dri_bo_alloc(i965->intel.bufmgr, 
1936                       "sampler 8x8 state ",
1937                       4096,
1938                       4096);
1939     assert(bo);
1940     pp_context->sampler_state_table.bo_8x8_uv = bo;
1941
1942     dri_bo_unreference(pp_context->vfe_state.bo);
1943     bo = dri_bo_alloc(i965->intel.bufmgr, 
1944                       "vfe state", 
1945                       sizeof(struct i965_vfe_state), 
1946                       4096);
1947     assert(bo);
1948     pp_context->vfe_state.bo = bo;
1949     
1950     for (i = 0; i < MAX_PP_SURFACES; i++) {
1951         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1952         pp_context->surfaces[i].ss_bo = NULL;
1953
1954         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1955         pp_context->surfaces[i].s_bo = NULL;
1956     }
1957
1958     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1959     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1960     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1961     pp_context->current_pp = pp_index;
1962     pp_module = &pp_context->pp_modules[pp_index];
1963     
1964     if (pp_module->initialize)
1965         pp_module->initialize(ctx, surface, input, srcw, srch, destw, desth);
1966 }
1967
1968 static void
1969 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1970 {
1971     unsigned int *binding_table;
1972     dri_bo *bo = pp_context->binding_table.bo;
1973     int i;
1974
1975     dri_bo_map(bo, 1);
1976     assert(bo->virtual);
1977     binding_table = bo->virtual;
1978     memset(binding_table, 0, bo->size);
1979
1980     for (i = 0; i < MAX_PP_SURFACES; i++) {
1981         if (pp_context->surfaces[i].ss_bo) {
1982             assert(pp_context->surfaces[i].s_bo);
1983
1984             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1985             dri_bo_emit_reloc(bo,
1986                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1987                               0,
1988                               i * sizeof(*binding_table),
1989                               pp_context->surfaces[i].ss_bo);
1990         }
1991     
1992     }
1993
1994     dri_bo_unmap(bo);
1995 }
1996
1997 static void
1998 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1999 {
2000     struct gen6_interface_descriptor_data *desc;
2001     dri_bo *bo;
2002     int pp_index = pp_context->current_pp;
2003
2004     bo = pp_context->idrt.bo;
2005     dri_bo_map(bo, True);
2006     assert(bo->virtual);
2007     desc = bo->virtual;
2008     memset(desc, 0, sizeof(*desc));
2009     desc->desc0.kernel_start_pointer = 
2010         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
2011     desc->desc1.single_program_flow = 1;
2012     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
2013     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
2014     desc->desc2.sampler_state_pointer = 
2015         pp_context->sampler_state_table.bo->offset >> 5;
2016     desc->desc3.binding_table_entry_count = 0;
2017     desc->desc3.binding_table_pointer = 
2018         pp_context->binding_table.bo->offset >> 5; /*reloc */
2019     desc->desc4.constant_urb_entry_read_offset = 0;
2020     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
2021
2022     dri_bo_emit_reloc(bo,
2023                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2024                       0,
2025                       offsetof(struct gen6_interface_descriptor_data, desc0),
2026                       pp_context->pp_modules[pp_index].kernel.bo);
2027
2028     dri_bo_emit_reloc(bo,
2029                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2030                       desc->desc2.sampler_count << 2,
2031                       offsetof(struct gen6_interface_descriptor_data, desc2),
2032                       pp_context->sampler_state_table.bo);
2033
2034     dri_bo_emit_reloc(bo,
2035                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2036                       desc->desc3.binding_table_entry_count,
2037                       offsetof(struct gen6_interface_descriptor_data, desc3),
2038                       pp_context->binding_table.bo);
2039
2040     dri_bo_unmap(bo);
2041     pp_context->idrt.num_interface_descriptors++;
2042 }
2043
2044 static void
2045 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
2046 {
2047     unsigned char *constant_buffer;
2048
2049     assert(sizeof(pp_static_parameter) == 128);
2050     dri_bo_map(pp_context->curbe.bo, 1);
2051     assert(pp_context->curbe.bo->virtual);
2052     constant_buffer = pp_context->curbe.bo->virtual;
2053     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
2054     dri_bo_unmap(pp_context->curbe.bo);
2055 }
2056
2057 static void
2058 gen6_pp_states_setup(VADriverContextP ctx)
2059 {
2060     struct i965_driver_data *i965 = i965_driver_data(ctx);
2061     struct i965_post_processing_context *pp_context = i965->pp_context;
2062
2063     gen6_pp_binding_table(pp_context);
2064     gen6_pp_interface_descriptor_table(pp_context);
2065     gen6_pp_upload_constants(pp_context);
2066 }
2067
2068 static void
2069 gen6_pp_pipeline_select(VADriverContextP ctx)
2070 {
2071     struct i965_driver_data *i965 = i965_driver_data(ctx);
2072     struct intel_batchbuffer *batch = i965->batch;
2073
2074     BEGIN_BATCH(batch, 1);
2075     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
2076     ADVANCE_BATCH(batch);
2077 }
2078
2079 static void
2080 gen6_pp_state_base_address(VADriverContextP ctx)
2081 {
2082     struct i965_driver_data *i965 = i965_driver_data(ctx);
2083     struct intel_batchbuffer *batch = i965->batch;
2084
2085     BEGIN_BATCH(batch, 10);
2086     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2087     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2088     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2089     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2090     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2091     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2092     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2093     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2094     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2095     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
2096     ADVANCE_BATCH(batch);
2097 }
2098
2099 static void
2100 gen6_pp_vfe_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2101 {
2102     struct i965_driver_data *i965 = i965_driver_data(ctx);
2103     struct intel_batchbuffer *batch = i965->batch;
2104
2105     BEGIN_BATCH(batch, 8);
2106     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
2107     OUT_BATCH(batch, 0);
2108     OUT_BATCH(batch,
2109               (pp_context->urb.num_vfe_entries - 1) << 16 |
2110               pp_context->urb.num_vfe_entries << 8);
2111     OUT_BATCH(batch, 0);
2112     OUT_BATCH(batch,
2113               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
2114               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
2115     OUT_BATCH(batch, 0);
2116     OUT_BATCH(batch, 0);
2117     OUT_BATCH(batch, 0);
2118     ADVANCE_BATCH(batch);
2119 }
2120
2121 static void
2122 gen6_pp_curbe_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2123 {
2124     struct i965_driver_data *i965 = i965_driver_data(ctx);
2125     struct intel_batchbuffer *batch = i965->batch;
2126
2127     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
2128
2129     BEGIN_BATCH(batch, 4);
2130     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
2131     OUT_BATCH(batch, 0);
2132     OUT_BATCH(batch,
2133               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
2134     OUT_RELOC(batch, 
2135               pp_context->curbe.bo,
2136               I915_GEM_DOMAIN_INSTRUCTION, 0,
2137               0);
2138     ADVANCE_BATCH(batch);
2139 }
2140
2141 static void
2142 gen6_interface_descriptor_load(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2143 {
2144     struct i965_driver_data *i965 = i965_driver_data(ctx);
2145     struct intel_batchbuffer *batch = i965->batch;
2146
2147     BEGIN_BATCH(batch, 4);
2148     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
2149     OUT_BATCH(batch, 0);
2150     OUT_BATCH(batch,
2151               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
2152     OUT_RELOC(batch, 
2153               pp_context->idrt.bo,
2154               I915_GEM_DOMAIN_INSTRUCTION, 0,
2155               0);
2156     ADVANCE_BATCH(batch);
2157 }
2158
2159 static void
2160 gen6_pp_object_walker(VADriverContextP ctx, struct i965_post_processing_context *pp_context)
2161 {
2162     struct i965_driver_data *i965 = i965_driver_data(ctx);
2163     struct intel_batchbuffer *batch = i965->batch;
2164     int x, x_steps, y, y_steps;
2165
2166     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
2167     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
2168
2169     for (y = 0; y < y_steps; y++) {
2170         for (x = 0; x < x_steps; x++) {
2171             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
2172                 BEGIN_BATCH(batch, 22);
2173                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
2174                 OUT_BATCH(batch, 0);
2175                 OUT_BATCH(batch, 0); /* no indirect data */
2176                 OUT_BATCH(batch, 0);
2177                 OUT_BATCH(batch, 0); /* scoreboard */
2178                 OUT_BATCH(batch, 0);
2179
2180                 /* inline data grf 5-6 */
2181                 assert(sizeof(pp_inline_parameter) == 64);
2182                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
2183
2184                 ADVANCE_BATCH(batch);
2185             }
2186         }
2187     }
2188 }
2189
2190 static void
2191 gen6_pp_pipeline_setup(VADriverContextP ctx)
2192 {
2193     struct i965_driver_data *i965 = i965_driver_data(ctx);
2194     struct intel_batchbuffer *batch = i965->batch;
2195     struct i965_post_processing_context *pp_context = i965->pp_context;
2196
2197     intel_batchbuffer_start_atomic(batch, 0x1000);
2198     intel_batchbuffer_emit_mi_flush(batch);
2199     gen6_pp_pipeline_select(ctx);
2200     gen6_pp_curbe_load(ctx, pp_context);
2201     gen6_interface_descriptor_load(ctx, pp_context);
2202     gen6_pp_state_base_address(ctx);
2203     gen6_pp_vfe_state(ctx, pp_context);
2204     gen6_pp_object_walker(ctx, pp_context);
2205     intel_batchbuffer_end_atomic(batch);
2206 }
2207
2208 static void
2209 gen6_post_processing(VADriverContextP ctx,
2210                      VASurfaceID surface,
2211                      int input,
2212                      short srcx,
2213                      short srcy,
2214                      unsigned short srcw,
2215                      unsigned short srch,
2216                      short destx,
2217                      short desty,
2218                      unsigned short destw,
2219                      unsigned short desth,
2220                      int pp_index)
2221 {
2222     gen6_pp_initialize(ctx, surface, input,
2223                        srcx, srcy, srcw, srch,
2224                        destx, desty, destw, desth,
2225                        pp_index);
2226     gen6_pp_states_setup(ctx);
2227     gen6_pp_pipeline_setup(ctx);
2228 }
2229
2230 static void
2231 i965_post_processing_internal(VADriverContextP ctx,
2232                               VASurfaceID surface,
2233                               int input,
2234                               short srcx,
2235                               short srcy,
2236                               unsigned short srcw,
2237                               unsigned short srch,
2238                               short destx,
2239                               short desty,
2240                               unsigned short destw,
2241                               unsigned short desth,
2242                               int pp_index)
2243 {
2244     struct i965_driver_data *i965 = i965_driver_data(ctx);
2245
2246     if (IS_GEN6(i965->intel.device_id) ||
2247         IS_GEN7(i965->intel.device_id))
2248         gen6_post_processing(ctx, surface, input,
2249                              srcx, srcy, srcw, srch,
2250                              destx, desty, destw, desth,
2251                              pp_index);
2252     else
2253         ironlake_post_processing(ctx, surface, input,
2254                                  srcx, srcy, srcw, srch,
2255                                  destx, desty, destw, desth,
2256                                  pp_index);
2257 }
2258
2259 void
2260 i965_post_processing(VADriverContextP ctx,
2261                      VASurfaceID surface,
2262                      short srcx,
2263                      short srcy,
2264                      unsigned short srcw,
2265                      unsigned short srch,
2266                      short destx,
2267                      short desty,
2268                      unsigned short destw,
2269                      unsigned short desth,
2270                      unsigned int flag)
2271 {
2272     struct i965_driver_data *i965 = i965_driver_data(ctx);
2273
2274     if (HAS_PP(i965)) {
2275         /* Currently only support post processing for NV12 surface */
2276         if (i965->render_state.interleaved_uv) {
2277             int internal_input = 0;
2278
2279             if (flag & I965_PP_FLAG_DEINTERLACING) {
2280                 i965_post_processing_internal(ctx, surface, internal_input,
2281                                               srcx, srcy, srcw, srch,
2282                                               destx, desty, destw, desth,
2283                                               PP_NV12_DNDI);
2284                 internal_input = 1;
2285             }
2286
2287             if (flag & I965_PP_FLAG_AVS) {
2288                 i965_post_processing_internal(ctx, surface, internal_input,
2289                                               srcx, srcy, srcw, srch,
2290                                               destx, desty, destw, desth,
2291                                               PP_NV12_AVS);
2292             }
2293         }
2294     }
2295 }       
2296
2297 Bool
2298 i965_post_processing_terminate(VADriverContextP ctx)
2299 {
2300     struct i965_driver_data *i965 = i965_driver_data(ctx);
2301     struct i965_post_processing_context *pp_context = i965->pp_context;
2302     int i;
2303
2304     if (HAS_PP(i965)) {
2305         if (pp_context) {
2306             dri_bo_unreference(pp_context->curbe.bo);
2307             pp_context->curbe.bo = NULL;
2308
2309             for (i = 0; i < MAX_PP_SURFACES; i++) {
2310                 dri_bo_unreference(pp_context->surfaces[i].ss_bo);
2311                 pp_context->surfaces[i].ss_bo = NULL;
2312
2313                 dri_bo_unreference(pp_context->surfaces[i].s_bo);
2314                 pp_context->surfaces[i].s_bo = NULL;
2315             }
2316
2317             dri_bo_unreference(pp_context->sampler_state_table.bo);
2318             pp_context->sampler_state_table.bo = NULL;
2319
2320             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
2321             pp_context->sampler_state_table.bo_8x8 = NULL;
2322
2323             dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
2324             pp_context->sampler_state_table.bo_8x8_uv = NULL;
2325
2326             dri_bo_unreference(pp_context->binding_table.bo);
2327             pp_context->binding_table.bo = NULL;
2328
2329             dri_bo_unreference(pp_context->idrt.bo);
2330             pp_context->idrt.bo = NULL;
2331             pp_context->idrt.num_interface_descriptors = 0;
2332
2333             dri_bo_unreference(pp_context->vfe_state.bo);
2334             pp_context->vfe_state.bo = NULL;
2335
2336             dri_bo_unreference(pp_context->stmm.bo);
2337             pp_context->stmm.bo = NULL;
2338
2339             for (i = 0; i < NUM_PP_MODULES; i++) {
2340                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2341
2342                 dri_bo_unreference(pp_module->kernel.bo);
2343                 pp_module->kernel.bo = NULL;
2344             }
2345
2346             free(pp_context);
2347         }
2348
2349         i965->pp_context = NULL;
2350     }
2351
2352     return True;
2353 }
2354
2355 Bool
2356 i965_post_processing_init(VADriverContextP ctx)
2357 {
2358     struct i965_driver_data *i965 = i965_driver_data(ctx);
2359     struct i965_post_processing_context *pp_context = i965->pp_context;
2360     int i;
2361
2362     if (HAS_PP(i965)) {
2363         if (pp_context == NULL) {
2364             pp_context = calloc(1, sizeof(*pp_context));
2365             i965->pp_context = pp_context;
2366
2367             pp_context->urb.size = URB_SIZE((&i965->intel));
2368             pp_context->urb.num_vfe_entries = 32;
2369             pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2370             pp_context->urb.num_cs_entries = 1;
2371             pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2372             pp_context->urb.vfe_start = 0;
2373             pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2374                 pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2375             assert(pp_context->urb.cs_start + 
2376                    pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2377
2378             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2379             assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2380
2381             if (IS_GEN6(i965->intel.device_id) ||
2382                 IS_GEN7(i965->intel.device_id))
2383                 memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2384             else if (IS_IRONLAKE(i965->intel.device_id))
2385                 memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2386
2387             for (i = 0; i < NUM_PP_MODULES; i++) {
2388                 struct pp_module *pp_module = &pp_context->pp_modules[i];
2389                 dri_bo_unreference(pp_module->kernel.bo);
2390                 pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2391                                                     pp_module->kernel.name,
2392                                                     pp_module->kernel.size,
2393                                                     4096);
2394                 assert(pp_module->kernel.bo);
2395                 dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2396             }
2397         }
2398     }
2399
2400     return True;
2401 }