i965_drv_video: code clean up
[profile/ivi/vaapi-intel-driver.git] / src / i965_post_processing.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38 #include "i965_defines.h"
39 #include "i965_structs.h"
40 #include "i965_drv_video.h"
41 #include "i965_post_processing.h"
42 #include "i965_render.h"
43
44 #define HAS_PP(ctx) (IS_IRONLAKE((ctx)->intel.device_id) ||     \
45                      IS_GEN6((ctx)->intel.device_id) ||         \
46                      IS_GEN7((ctx)->intel.device_id))
47
48 static const uint32_t pp_null_gen5[][4] = {
49 #include "shaders/post_processing/null.g4b.gen5"
50 };
51
52 static const uint32_t pp_nv12_load_save_gen5[][4] = {
53 #include "shaders/post_processing/nv12_load_save_nv12.g4b.gen5"
54 };
55
56 static const uint32_t pp_nv12_scaling_gen5[][4] = {
57 #include "shaders/post_processing/nv12_scaling_nv12.g4b.gen5"
58 };
59
60 static const uint32_t pp_nv12_avs_gen5[][4] = {
61 #include "shaders/post_processing/nv12_avs_nv12.g4b.gen5"
62 };
63
64 static const uint32_t pp_nv12_dndi_gen5[][4] = {
65 #include "shaders/post_processing/nv12_dndi_nv12.g4b.gen5"
66 };
67
68 static void pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
69                                VASurfaceID in_surface_id, VASurfaceID out_surface_id,
70                                const VARectangle *src_rect, const VARectangle *dst_rect);
71 static void pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
72                                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
73                                    const VARectangle *src_rect, const VARectangle *dst_rect);
74 static void pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
75                                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
76                                        const VARectangle *src_rect, const VARectangle *dst_rect);
77 static void pp_nv12_load_save_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
78                                          VASurfaceID in_surface_id, VASurfaceID out_surface_id,
79                                          const VARectangle *src_rect, const VARectangle *dst_rect);
80 static void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
81                                     VASurfaceID in_surface_id, VASurfaceID out_surface_id,
82                                     const VARectangle *src_rect, const VARectangle *dst_rect);
83
84 static struct pp_module pp_modules_gen5[] = {
85     {
86         {
87             "NULL module (for testing)",
88             PP_NULL,
89             pp_null_gen5,
90             sizeof(pp_null_gen5),
91             NULL,
92         },
93
94         pp_null_initialize,
95     },
96
97     {
98         {
99             "NV12 Load & Save module",
100             PP_NV12_LOAD_SAVE,
101             pp_nv12_load_save_gen5,
102             sizeof(pp_nv12_load_save_gen5),
103             NULL,
104         },
105
106         pp_nv12_load_save_initialize,
107     },
108
109     {
110         {
111             "NV12 Scaling module",
112             PP_NV12_SCALING,
113             pp_nv12_scaling_gen5,
114             sizeof(pp_nv12_scaling_gen5),
115             NULL,
116         },
117
118         pp_nv12_scaling_initialize,
119     },
120
121     {
122         {
123             "NV12 AVS module",
124             PP_NV12_AVS,
125             pp_nv12_avs_gen5,
126             sizeof(pp_nv12_avs_gen5),
127             NULL,
128         },
129
130         pp_nv12_avs_initialize,
131     },
132
133     {
134         {
135             "NV12 DNDI module",
136             PP_NV12_DNDI,
137             pp_nv12_dndi_gen5,
138             sizeof(pp_nv12_dndi_gen5),
139             NULL,
140         },
141
142         pp_nv12_dndi_initialize,
143     },
144 };
145
146 static const uint32_t pp_null_gen6[][4] = {
147 #include "shaders/post_processing/null.g6b"
148 };
149
150 static const uint32_t pp_nv12_load_save_gen6[][4] = {
151 #include "shaders/post_processing/nv12_load_save_nv12.g6b"
152 };
153
154 static const uint32_t pp_nv12_scaling_gen6[][4] = {
155 #include "shaders/post_processing/nv12_scaling_nv12.g6b"
156 };
157
158 static const uint32_t pp_nv12_avs_gen6[][4] = {
159 #include "shaders/post_processing/nv12_avs_nv12.g6b"
160 };
161
162 static const uint32_t pp_nv12_dndi_gen6[][4] = {
163 #include "shaders/post_processing/nv12_dndi_nv12.g6b"
164 };
165
166 static struct pp_module pp_modules_gen6[] = {
167     {
168         {
169             "NULL module (for testing)",
170             PP_NULL,
171             pp_null_gen6,
172             sizeof(pp_null_gen6),
173             NULL,
174         },
175
176         pp_null_initialize,
177     },
178
179     {
180         {
181             "NV12 Load & Save module",
182             PP_NV12_LOAD_SAVE,
183             pp_nv12_load_save_gen6,
184             sizeof(pp_nv12_load_save_gen6),
185             NULL,
186         },
187
188         pp_nv12_load_save_initialize,
189     },
190
191     {
192         {
193             "NV12 Scaling module",
194             PP_NV12_SCALING,
195             pp_nv12_scaling_gen6,
196             sizeof(pp_nv12_scaling_gen6),
197             NULL,
198         },
199
200         pp_nv12_scaling_initialize,
201     },
202
203     {
204         {
205             "NV12 AVS module",
206             PP_NV12_AVS,
207             pp_nv12_avs_gen6,
208             sizeof(pp_nv12_avs_gen6),
209             NULL,
210         },
211
212         pp_nv12_avs_initialize,
213     },
214
215     {
216         {
217             "NV12 DNDI module",
218             PP_NV12_DNDI,
219             pp_nv12_dndi_gen6,
220             sizeof(pp_nv12_dndi_gen6),
221             NULL,
222         },
223
224         pp_nv12_dndi_initialize,
225     },
226 };
227
228 #define pp_static_parameter     pp_context->pp_static_parameter
229 #define pp_inline_parameter     pp_context->pp_inline_parameter
230
231 static void
232 pp_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234     switch (tiling) {
235     case I915_TILING_NONE:
236         ss->ss3.tiled_surface = 0;
237         ss->ss3.tile_walk = 0;
238         break;
239     case I915_TILING_X:
240         ss->ss3.tiled_surface = 1;
241         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242         break;
243     case I915_TILING_Y:
244         ss->ss3.tiled_surface = 1;
245         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246         break;
247     }
248 }
249
250 static void
251 pp_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253     switch (tiling) {
254     case I915_TILING_NONE:
255         ss->ss2.tiled_surface = 0;
256         ss->ss2.tile_walk = 0;
257         break;
258     case I915_TILING_X:
259         ss->ss2.tiled_surface = 1;
260         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261         break;
262     case I915_TILING_Y:
263         ss->ss2.tiled_surface = 1;
264         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265         break;
266     }
267 }
268
269 static void
270 ironlake_pp_surface_state(struct i965_post_processing_context *pp_context)
271 {
272
273 }
274
275 static void
276 ironlake_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
277 {
278     struct i965_interface_descriptor *desc;
279     dri_bo *bo;
280     int pp_index = pp_context->current_pp;
281
282     bo = pp_context->idrt.bo;
283     dri_bo_map(bo, 1);
284     assert(bo->virtual);
285     desc = bo->virtual;
286     memset(desc, 0, sizeof(*desc));
287     desc->desc0.grf_reg_blocks = 10;
288     desc->desc0.kernel_start_pointer = pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
289     desc->desc1.const_urb_entry_read_offset = 0;
290     desc->desc1.const_urb_entry_read_len = 4; /* grf 1-4 */
291     desc->desc2.sampler_state_pointer = pp_context->sampler_state_table.bo->offset >> 5;
292     desc->desc2.sampler_count = 0;
293     desc->desc3.binding_table_entry_count = 0;
294     desc->desc3.binding_table_pointer = 
295         pp_context->binding_table.bo->offset >> 5; /*reloc */
296
297     dri_bo_emit_reloc(bo,
298                       I915_GEM_DOMAIN_INSTRUCTION, 0,
299                       desc->desc0.grf_reg_blocks,
300                       offsetof(struct i965_interface_descriptor, desc0),
301                       pp_context->pp_modules[pp_index].kernel.bo);
302
303     dri_bo_emit_reloc(bo,
304                       I915_GEM_DOMAIN_INSTRUCTION, 0,
305                       desc->desc2.sampler_count << 2,
306                       offsetof(struct i965_interface_descriptor, desc2),
307                       pp_context->sampler_state_table.bo);
308
309     dri_bo_emit_reloc(bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, 0,
311                       desc->desc3.binding_table_entry_count,
312                       offsetof(struct i965_interface_descriptor, desc3),
313                       pp_context->binding_table.bo);
314
315     dri_bo_unmap(bo);
316     pp_context->idrt.num_interface_descriptors++;
317 }
318
319 static void
320 ironlake_pp_binding_table(struct i965_post_processing_context *pp_context)
321 {
322     unsigned int *binding_table;
323     dri_bo *bo = pp_context->binding_table.bo;
324     int i;
325
326     dri_bo_map(bo, 1);
327     assert(bo->virtual);
328     binding_table = bo->virtual;
329     memset(binding_table, 0, bo->size);
330
331     for (i = 0; i < MAX_PP_SURFACES; i++) {
332         if (pp_context->surfaces[i].ss_bo) {
333             assert(pp_context->surfaces[i].s_bo);
334
335             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
336             dri_bo_emit_reloc(bo,
337                               I915_GEM_DOMAIN_INSTRUCTION, 0,
338                               0,
339                               i * sizeof(*binding_table),
340                               pp_context->surfaces[i].ss_bo);
341         }
342     
343     }
344
345     dri_bo_unmap(bo);
346 }
347
348 static void
349 ironlake_pp_vfe_state(struct i965_post_processing_context *pp_context)
350 {
351     struct i965_vfe_state *vfe_state;
352     dri_bo *bo;
353
354     bo = pp_context->vfe_state.bo;
355     dri_bo_map(bo, 1);
356     assert(bo->virtual);
357     vfe_state = bo->virtual;
358     memset(vfe_state, 0, sizeof(*vfe_state));
359     vfe_state->vfe1.max_threads = pp_context->urb.num_vfe_entries - 1;
360     vfe_state->vfe1.urb_entry_alloc_size = pp_context->urb.size_vfe_entry - 1;
361     vfe_state->vfe1.num_urb_entries = pp_context->urb.num_vfe_entries;
362     vfe_state->vfe1.vfe_mode = VFE_GENERIC_MODE;
363     vfe_state->vfe1.children_present = 0;
364     vfe_state->vfe2.interface_descriptor_base = 
365         pp_context->idrt.bo->offset >> 4; /* reloc */
366     dri_bo_emit_reloc(bo,
367                       I915_GEM_DOMAIN_INSTRUCTION, 0,
368                       0,
369                       offsetof(struct i965_vfe_state, vfe2),
370                       pp_context->idrt.bo);
371     dri_bo_unmap(bo);
372 }
373
374 static void
375 ironlake_pp_upload_constants(struct i965_post_processing_context *pp_context)
376 {
377     unsigned char *constant_buffer;
378
379     assert(sizeof(pp_static_parameter) == 128);
380     dri_bo_map(pp_context->curbe.bo, 1);
381     assert(pp_context->curbe.bo->virtual);
382     constant_buffer = pp_context->curbe.bo->virtual;
383     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
384     dri_bo_unmap(pp_context->curbe.bo);
385 }
386
387 static void
388 ironlake_pp_states_setup(VADriverContextP ctx,
389                          struct i965_post_processing_context *pp_context)
390 {
391     ironlake_pp_surface_state(pp_context);
392     ironlake_pp_binding_table(pp_context);
393     ironlake_pp_interface_descriptor_table(pp_context);
394     ironlake_pp_vfe_state(pp_context);
395     ironlake_pp_upload_constants(pp_context);
396 }
397
398 static void
399 ironlake_pp_pipeline_select(VADriverContextP ctx,
400                             struct i965_post_processing_context *pp_context)
401 {
402     struct intel_batchbuffer *batch = pp_context->batch;
403
404     BEGIN_BATCH(batch, 1);
405     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
406     ADVANCE_BATCH(batch);
407 }
408
409 static void
410 ironlake_pp_urb_layout(VADriverContextP ctx,
411                        struct i965_post_processing_context *pp_context)
412 {
413     struct intel_batchbuffer *batch = pp_context->batch;
414     unsigned int vfe_fence, cs_fence;
415
416     vfe_fence = pp_context->urb.cs_start;
417     cs_fence = pp_context->urb.size;
418
419     BEGIN_BATCH(batch, 3);
420     OUT_BATCH(batch, CMD_URB_FENCE | UF0_VFE_REALLOC | UF0_CS_REALLOC | 1);
421     OUT_BATCH(batch, 0);
422     OUT_BATCH(batch, 
423               (vfe_fence << UF2_VFE_FENCE_SHIFT) |      /* VFE_SIZE */
424               (cs_fence << UF2_CS_FENCE_SHIFT));        /* CS_SIZE */
425     ADVANCE_BATCH(batch);
426 }
427
428 static void
429 ironlake_pp_state_base_address(VADriverContextP ctx,
430                                struct i965_post_processing_context *pp_context)
431 {
432     struct intel_batchbuffer *batch = pp_context->batch;
433
434     BEGIN_BATCH(batch, 8);
435     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
436     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
437     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
438     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
439     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
440     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
441     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
442     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
443     ADVANCE_BATCH(batch);
444 }
445
446 static void
447 ironlake_pp_state_pointers(VADriverContextP ctx,
448                            struct i965_post_processing_context *pp_context)
449 {
450     struct intel_batchbuffer *batch = pp_context->batch;
451
452     BEGIN_BATCH(batch, 3);
453     OUT_BATCH(batch, CMD_MEDIA_STATE_POINTERS | 1);
454     OUT_BATCH(batch, 0);
455     OUT_RELOC(batch, pp_context->vfe_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
456     ADVANCE_BATCH(batch);
457 }
458
459 static void 
460 ironlake_pp_cs_urb_layout(VADriverContextP ctx,
461                           struct i965_post_processing_context *pp_context)
462 {
463     struct intel_batchbuffer *batch = pp_context->batch;
464
465     BEGIN_BATCH(batch, 2);
466     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
467     OUT_BATCH(batch,
468               ((pp_context->urb.size_cs_entry - 1) << 4) |     /* URB Entry Allocation Size */
469               (pp_context->urb.num_cs_entries << 0));          /* Number of URB Entries */
470     ADVANCE_BATCH(batch);
471 }
472
473 static void
474 ironlake_pp_constant_buffer(VADriverContextP ctx,
475                             struct i965_post_processing_context *pp_context)
476 {
477     struct intel_batchbuffer *batch = pp_context->batch;
478
479     BEGIN_BATCH(batch, 2);
480     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
481     OUT_RELOC(batch, pp_context->curbe.bo,
482               I915_GEM_DOMAIN_INSTRUCTION, 0,
483               pp_context->urb.size_cs_entry - 1);
484     ADVANCE_BATCH(batch);    
485 }
486
487 static void
488 ironlake_pp_object_walker(VADriverContextP ctx,
489                           struct i965_post_processing_context *pp_context)
490 {
491     struct intel_batchbuffer *batch = pp_context->batch;
492     int x, x_steps, y, y_steps;
493
494     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
495     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
496
497     for (y = 0; y < y_steps; y++) {
498         for (x = 0; x < x_steps; x++) {
499             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
500                 BEGIN_BATCH(batch, 20);
501                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 18);
502                 OUT_BATCH(batch, 0);
503                 OUT_BATCH(batch, 0); /* no indirect data */
504                 OUT_BATCH(batch, 0);
505
506                 /* inline data grf 5-6 */
507                 assert(sizeof(pp_inline_parameter) == 64);
508                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
509
510                 ADVANCE_BATCH(batch);
511             }
512         }
513     }
514 }
515
516 static void
517 ironlake_pp_pipeline_setup(VADriverContextP ctx,
518                            struct i965_post_processing_context *pp_context)
519 {
520     struct intel_batchbuffer *batch = pp_context->batch;
521
522     intel_batchbuffer_start_atomic(batch, 0x1000);
523     intel_batchbuffer_emit_mi_flush(batch);
524     ironlake_pp_pipeline_select(ctx, pp_context);
525     ironlake_pp_state_base_address(ctx, pp_context);
526     ironlake_pp_state_pointers(ctx, pp_context);
527     ironlake_pp_urb_layout(ctx, pp_context);
528     ironlake_pp_cs_urb_layout(ctx, pp_context);
529     ironlake_pp_constant_buffer(ctx, pp_context);
530     ironlake_pp_object_walker(ctx, pp_context);
531     intel_batchbuffer_end_atomic(batch);
532 }
533
534 static void
535 i965_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
536                           dri_bo *surf_bo, unsigned long surf_bo_offset,
537                           int width, int height, int pitch, int format, 
538                           int index, int is_target)
539 {
540     struct i965_driver_data *i965 = i965_driver_data(ctx);
541     struct i965_surface_state *ss;
542     dri_bo *ss_bo;
543     unsigned int tiling;
544     unsigned int swizzle;
545
546     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
547     pp_context->surfaces[index].s_bo = surf_bo;
548     dri_bo_reference(pp_context->surfaces[index].s_bo);
549     ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
550                          "surface state", 
551                          sizeof(struct i965_surface_state), 
552                          4096);
553     assert(ss_bo);
554     pp_context->surfaces[index].ss_bo = ss_bo;
555     dri_bo_map(ss_bo, True);
556     assert(ss_bo->virtual);
557     ss = ss_bo->virtual;
558     memset(ss, 0, sizeof(*ss));
559     ss->ss0.surface_type = I965_SURFACE_2D;
560     ss->ss0.surface_format = format;
561     ss->ss1.base_addr = surf_bo->offset + surf_bo_offset;
562     ss->ss2.width = width - 1;
563     ss->ss2.height = height - 1;
564     ss->ss3.pitch = pitch - 1;
565     pp_set_surface_tiling(ss, tiling);
566     dri_bo_emit_reloc(ss_bo,
567                       I915_GEM_DOMAIN_RENDER, is_target ? I915_GEM_DOMAIN_RENDER : 0,
568                       surf_bo_offset,
569                       offsetof(struct i965_surface_state, ss1),
570                       pp_context->surfaces[index].s_bo);
571     dri_bo_unmap(ss_bo);
572 }
573
574 static void
575 i965_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
576                            dri_bo *surf_bo, unsigned long surf_bo_offset,
577                            int width, int height, int wpitch,
578                            int xoffset, int yoffset,
579                            int format, int interleave_chroma,
580                            int index)
581 {
582     struct i965_driver_data *i965 = i965_driver_data(ctx);
583     struct i965_surface_state2 *ss2;
584     dri_bo *ss2_bo;
585     unsigned int tiling;
586     unsigned int swizzle;
587
588     dri_bo_get_tiling(surf_bo, &tiling, &swizzle);
589     pp_context->surfaces[index].s_bo = surf_bo;
590     dri_bo_reference(pp_context->surfaces[index].s_bo);
591     ss2_bo = dri_bo_alloc(i965->intel.bufmgr, 
592                           "YUV surface state", 
593                           sizeof(struct i965_surface_state2), 
594                           4096);
595     assert(ss2_bo);
596     pp_context->surfaces[index].ss_bo = ss2_bo;
597     dri_bo_map(ss2_bo, True);
598     assert(ss2_bo->virtual);
599     ss2 = ss2_bo->virtual;
600     memset(ss2, 0, sizeof(*ss2));
601     ss2->ss0.surface_base_address = surf_bo->offset + surf_bo_offset;
602     ss2->ss1.cbcr_pixel_offset_v_direction = 0;
603     ss2->ss1.width = width - 1;
604     ss2->ss1.height = height - 1;
605     ss2->ss2.pitch = wpitch - 1;
606     ss2->ss2.interleave_chroma = interleave_chroma;
607     ss2->ss2.surface_format = format;
608     ss2->ss3.x_offset_for_cb = xoffset;
609     ss2->ss3.y_offset_for_cb = yoffset;
610     pp_set_surface2_tiling(ss2, tiling);
611     dri_bo_emit_reloc(ss2_bo,
612                       I915_GEM_DOMAIN_RENDER, 0,
613                       surf_bo_offset,
614                       offsetof(struct i965_surface_state2, ss0),
615                       surf_bo);
616     dri_bo_unmap(ss2_bo);
617 }
618
619 static int
620 pp_null_x_steps(void *private_context)
621 {
622     return 1;
623 }
624
625 static int
626 pp_null_y_steps(void *private_context)
627 {
628     return 1;
629 }
630
631 static int
632 pp_null_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
633 {
634     return 0;
635 }
636
637 static void
638 pp_null_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
639                    VASurfaceID in_surface_id, VASurfaceID out_surface_id,
640                    const VARectangle *src_rect, const VARectangle *dst_rect)
641 {
642     /* private function & data */
643     pp_context->pp_x_steps = pp_null_x_steps;
644     pp_context->pp_y_steps = pp_null_y_steps;
645     pp_context->pp_set_block_parameter = pp_null_set_block_parameter;
646 }
647
648 static int
649 pp_load_save_x_steps(void *private_context)
650 {
651     return 1;
652 }
653
654 static int
655 pp_load_save_y_steps(void *private_context)
656 {
657     struct pp_load_save_context *pp_load_save_context = private_context;
658
659     return pp_load_save_context->dest_h / 8;
660 }
661
662 static int
663 pp_load_save_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
664 {
665     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
666     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
667     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
668     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8;
669
670     return 0;
671 }
672
673 static void
674 pp_nv12_load_save_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
675                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
676                              const VARectangle *src_rect, const VARectangle *dst_rect)
677 {
678     struct i965_driver_data *i965 = i965_driver_data(ctx);
679     struct pp_load_save_context *pp_load_save_context = (struct pp_load_save_context *)&pp_context->private_context;
680     struct object_surface *obj_surface;
681     int w, h;
682     int orig_w, orig_h;
683
684     /* source surface */
685     obj_surface = SURFACE(in_surface_id);
686     orig_w = obj_surface->orig_width;
687     orig_h = obj_surface->orig_height;
688     w = obj_surface->width;
689     h = obj_surface->height;
690
691     /* source Y surface index 1 */
692     i965_pp_set_surface_state(ctx, pp_context,
693                               obj_surface->bo, 0,
694                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
695                               1, 0);
696
697     /* source UV surface index 2 */
698     i965_pp_set_surface_state(ctx, pp_context,
699                               obj_surface->bo, w * h,
700                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
701                               2, 0);
702
703     /* destination surface */
704     obj_surface = SURFACE(out_surface_id);
705     orig_w = obj_surface->orig_width;
706     orig_h = obj_surface->orig_height;
707     w = obj_surface->width;
708     h = obj_surface->height;
709
710     /* destination Y surface index 7 */
711     i965_pp_set_surface_state(ctx, pp_context,
712                               obj_surface->bo, 0,
713                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
714                               7, 1);
715
716     /* destination UV surface index 8 */
717     i965_pp_set_surface_state(ctx, pp_context,
718                               obj_surface->bo, w * h,
719                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
720                               8, 1);
721
722     /* private function & data */
723     pp_context->pp_x_steps = pp_load_save_x_steps;
724     pp_context->pp_y_steps = pp_load_save_y_steps;
725     pp_context->pp_set_block_parameter = pp_load_save_set_block_parameter;
726     pp_load_save_context->dest_h = h;
727     pp_load_save_context->dest_w = w;
728
729     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
730     pp_inline_parameter.grf5.number_blocks = w / 16;
731 }
732
733 static int
734 pp_scaling_x_steps(void *private_context)
735 {
736     return 1;
737 }
738
739 static int
740 pp_scaling_y_steps(void *private_context)
741 {
742     struct pp_scaling_context *pp_scaling_context = private_context;
743
744     return pp_scaling_context->dest_h / 8;
745 }
746
747 static int
748 pp_scaling_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
749 {
750     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
751     float src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
752     float src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
753
754     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = src_x_steping * x * 16 + pp_scaling_context->src_normalized_x;
755     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_scaling_context->src_normalized_y;
756     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_scaling_context->dest_x;
757     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_scaling_context->dest_y;
758     
759     return 0;
760 }
761
762 static void
763 pp_nv12_scaling_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
764                            VASurfaceID in_surface_id, VASurfaceID out_surface_id,
765                            const VARectangle *src_rect, const VARectangle *dst_rect)
766 {
767     struct i965_driver_data *i965 = i965_driver_data(ctx);
768     struct pp_scaling_context *pp_scaling_context = (struct pp_scaling_context *)&pp_context->private_context;
769     struct object_surface *obj_surface;
770     struct i965_sampler_state *sampler_state;
771     int in_w, in_h, in_wpitch, in_hpitch;
772     int out_w, out_h, out_wpitch, out_hpitch;
773
774     /* source surface */
775     obj_surface = SURFACE(in_surface_id);
776     in_w = obj_surface->orig_width;
777     in_h = obj_surface->orig_height;
778     in_wpitch = obj_surface->width;
779     in_hpitch = obj_surface->height;
780
781     /* source Y surface index 1 */
782     i965_pp_set_surface_state(ctx, pp_context,
783                               obj_surface->bo, 0,
784                               in_w, in_h, in_wpitch, I965_SURFACEFORMAT_R8_UNORM,
785                               1, 0);
786
787     /* source UV surface index 2 */
788     i965_pp_set_surface_state(ctx, pp_context,
789                               obj_surface->bo, in_wpitch * in_hpitch,
790                               in_w / 2, in_h / 2, in_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
791                               2, 0);
792
793     /* destination surface */
794     obj_surface = SURFACE(out_surface_id);
795     out_w = obj_surface->orig_width;
796     out_h = obj_surface->orig_height;
797     out_wpitch = obj_surface->width;
798     out_hpitch = obj_surface->height;
799
800     /* destination Y surface index 7 */
801     i965_pp_set_surface_state(ctx, pp_context,
802                               obj_surface->bo, 0,
803                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
804                               7, 1);
805
806     /* destination UV surface index 8 */
807     i965_pp_set_surface_state(ctx, pp_context,
808                               obj_surface->bo, out_wpitch * out_hpitch,
809                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
810                               8, 1);
811
812     /* sampler state */
813     dri_bo_map(pp_context->sampler_state_table.bo, True);
814     assert(pp_context->sampler_state_table.bo->virtual);
815     sampler_state = pp_context->sampler_state_table.bo->virtual;
816
817     /* SIMD16 Y index 1 */
818     sampler_state[1].ss0.min_filter = I965_MAPFILTER_LINEAR;
819     sampler_state[1].ss0.mag_filter = I965_MAPFILTER_LINEAR;
820     sampler_state[1].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
821     sampler_state[1].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
822     sampler_state[1].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
823
824     /* SIMD16 UV index 2 */
825     sampler_state[2].ss0.min_filter = I965_MAPFILTER_LINEAR;
826     sampler_state[2].ss0.mag_filter = I965_MAPFILTER_LINEAR;
827     sampler_state[2].ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
828     sampler_state[2].ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
829     sampler_state[2].ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
830
831     dri_bo_unmap(pp_context->sampler_state_table.bo);
832
833     /* private function & data */
834     pp_context->pp_x_steps = pp_scaling_x_steps;
835     pp_context->pp_y_steps = pp_scaling_y_steps;
836     pp_context->pp_set_block_parameter = pp_scaling_set_block_parameter;
837
838     pp_scaling_context->dest_x = dst_rect->x;
839     pp_scaling_context->dest_y = dst_rect->y;
840     pp_scaling_context->dest_w = ALIGN(dst_rect->width, 16);
841     pp_scaling_context->dest_h = ALIGN(dst_rect->height, 16);
842     pp_scaling_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
843     pp_scaling_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
844
845     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
846
847     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
848     pp_inline_parameter.grf5.block_count_x = pp_scaling_context->dest_w / 16;   /* 1 x N */
849     pp_inline_parameter.grf5.number_blocks = pp_scaling_context->dest_w / 16;
850     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
851     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
852 }
853
854 static int
855 pp_avs_x_steps(void *private_context)
856 {
857     struct pp_avs_context *pp_avs_context = private_context;
858
859     return pp_avs_context->dest_w / 16;
860 }
861
862 static int
863 pp_avs_y_steps(void *private_context)
864 {
865     return 1;
866 }
867
868 static int
869 pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
870 {
871     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
872     float src_x_steping, src_y_steping, video_step_delta;
873     int tmp_w = ALIGN(pp_avs_context->dest_h * pp_avs_context->src_w / pp_avs_context->src_h, 16);
874
875     if (tmp_w >= pp_avs_context->dest_w) {
876         pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
877         pp_inline_parameter.grf6.video_step_delta = 0;
878         
879         if (x == 0) {
880             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = (float)(tmp_w - pp_avs_context->dest_w) / tmp_w / 2 +
881                 pp_avs_context->src_normalized_x;
882         } else {
883             src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
884             video_step_delta = pp_inline_parameter.grf6.video_step_delta;
885             pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
886                 16 * 15 * video_step_delta / 2;
887         }
888     } else {
889         int n0, n1, n2, nls_left, nls_right;
890         int factor_a = 5, factor_b = 4;
891         float f;
892
893         n0 = (pp_avs_context->dest_w - tmp_w) / (16 * 2);
894         n1 = (pp_avs_context->dest_w - tmp_w) / 16 - n0;
895         n2 = tmp_w / (16 * factor_a);
896         nls_left = n0 + n2;
897         nls_right = n1 + n2;
898         f = (float) n2 * 16 / tmp_w;
899         
900         if (n0 < 5) {
901             pp_inline_parameter.grf6.video_step_delta = 0.0;
902
903             if (x == 0) {
904                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / pp_avs_context->dest_w;
905                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
906             } else {
907                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
908                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
909                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
910                     16 * 15 * video_step_delta / 2;
911             }
912         } else {
913             if (x < nls_left) {
914                 /* f = a * nls_left * 16 + b * nls_left * 16 * (nls_left * 16 - 1) / 2 */
915                 float a = f / (nls_left * 16 * factor_b);
916                 float b = (f - nls_left * 16 * a) * 2 / (nls_left * 16 * (nls_left * 16 - 1));
917                 
918                 pp_inline_parameter.grf6.video_step_delta = b;
919
920                 if (x == 0) {
921                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin = pp_avs_context->src_normalized_x;
922                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a;
923                 } else {
924                     src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
925                     video_step_delta = pp_inline_parameter.grf6.video_step_delta;
926                     pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
927                         16 * 15 * video_step_delta / 2;
928                     pp_inline_parameter.grf5.normalized_video_x_scaling_step += 16 * b;
929                 }
930             } else if (x < (pp_avs_context->dest_w / 16 - nls_right)) {
931                 /* scale the center linearly */
932                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
933                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
934                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
935                     16 * 15 * video_step_delta / 2;
936                 pp_inline_parameter.grf6.video_step_delta = 0.0;
937                 pp_inline_parameter.grf5.normalized_video_x_scaling_step = 1.0 / tmp_w;
938             } else {
939                 float a = f / (nls_right * 16 * factor_b);
940                 float b = (f - nls_right * 16 * a) * 2 / (nls_right * 16 * (nls_right * 16 - 1));
941
942                 src_x_steping = pp_inline_parameter.grf5.normalized_video_x_scaling_step;
943                 video_step_delta = pp_inline_parameter.grf6.video_step_delta;
944                 pp_inline_parameter.grf5.r5_1.source_surface_block_normalized_horizontal_origin += src_x_steping * 16 +
945                     16 * 15 * video_step_delta / 2;
946                 pp_inline_parameter.grf6.video_step_delta = -b;
947
948                 if (x == (pp_avs_context->dest_w / 16 - nls_right))
949                     pp_inline_parameter.grf5.normalized_video_x_scaling_step = a + (nls_right * 16  - 1) * b;
950                 else
951                     pp_inline_parameter.grf5.normalized_video_x_scaling_step -= b * 16;
952             }
953         }
954     }
955
956     src_y_steping = pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step;
957     pp_inline_parameter.grf5.source_surface_block_normalized_vertical_origin = src_y_steping * y * 8 + pp_avs_context->src_normalized_y;
958     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
959     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 8 + pp_avs_context->dest_y;
960
961     return 0;
962 }
963
964 static void
965 pp_nv12_avs_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
966                        VASurfaceID in_surface_id, VASurfaceID out_surface_id,
967                        const VARectangle *src_rect, const VARectangle *dst_rect)
968 {
969     struct i965_driver_data *i965 = i965_driver_data(ctx);
970     struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)&pp_context->private_context;
971     struct object_surface *obj_surface;
972     struct i965_sampler_8x8 *sampler_8x8;
973     struct i965_sampler_8x8_state *sampler_8x8_state;
974     int index;
975     int in_w, in_h, in_wpitch, in_hpitch;
976     int out_w, out_h, out_wpitch, out_hpitch;
977
978     /* surface */
979     obj_surface = SURFACE(in_surface_id);
980     in_w = obj_surface->orig_width;
981     in_h = obj_surface->orig_height;
982     in_wpitch = obj_surface->width;
983     in_hpitch = obj_surface->height;
984
985     /* source Y surface index 1 */
986     i965_pp_set_surface2_state(ctx, pp_context,
987                                obj_surface->bo, 0,
988                                in_w, in_h, in_wpitch,
989                                0, 0,
990                                SURFACE_FORMAT_Y8_UNORM, 0,
991                                1);
992
993     /* source UV surface index 2 */
994     i965_pp_set_surface2_state(ctx, pp_context,
995                                obj_surface->bo, in_wpitch * in_hpitch,
996                                in_w, in_h, in_wpitch,
997                                0, 0,
998                                SURFACE_FORMAT_PLANAR_420_8, 1,
999                                2);
1000
1001     /* destination surface */
1002     obj_surface = SURFACE(out_surface_id);
1003     out_w = obj_surface->orig_width;
1004     out_h = obj_surface->orig_height;
1005     out_wpitch = obj_surface->width;
1006     out_hpitch = obj_surface->height;
1007     assert(out_w <= out_wpitch && out_h <= out_hpitch);
1008
1009     /* destination Y surface index 7 */
1010     i965_pp_set_surface_state(ctx, pp_context,
1011                               obj_surface->bo, 0,
1012                               out_w / 4, out_h, out_wpitch, I965_SURFACEFORMAT_R8_UNORM,
1013                               7, 1);
1014
1015     /* destination UV surface index 8 */
1016     i965_pp_set_surface_state(ctx, pp_context,
1017                               obj_surface->bo, out_wpitch * out_hpitch,
1018                               out_w / 4, out_h / 2, out_wpitch, I965_SURFACEFORMAT_R8G8_UNORM,
1019                               8, 1);
1020
1021     /* sampler 8x8 state */
1022     dri_bo_map(pp_context->sampler_state_table.bo_8x8, True);
1023     assert(pp_context->sampler_state_table.bo_8x8->virtual);
1024     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1025     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8->virtual;
1026     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1027     sampler_8x8_state->dw136.default_sharpness_level = 0;
1028     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 1;
1029     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1030     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1031     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8);
1032
1033     /* sampler 8x8 */
1034     dri_bo_map(pp_context->sampler_state_table.bo, True);
1035     assert(pp_context->sampler_state_table.bo->virtual);
1036     assert(sizeof(*sampler_8x8) == sizeof(int) * 16);
1037     sampler_8x8 = pp_context->sampler_state_table.bo->virtual;
1038
1039     /* sample_8x8 Y index 1 */
1040     index = 1;
1041     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1042     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_ADAPTIVE_8_TAP;
1043     sampler_8x8[index].dw0.ief_bypass = 0;
1044     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1045     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1046     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8->offset >> 5;
1047     sampler_8x8[index].dw2.global_noise_estimation = 22;
1048     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1049     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1050     sampler_8x8[index].dw3.strong_edge_weight = 7;
1051     sampler_8x8[index].dw3.regular_weight = 2;
1052     sampler_8x8[index].dw3.non_edge_weight = 0;
1053     sampler_8x8[index].dw3.gain_factor = 40;
1054     sampler_8x8[index].dw4.steepness_boost = 0;
1055     sampler_8x8[index].dw4.steepness_threshold = 0;
1056     sampler_8x8[index].dw4.mr_boost = 0;
1057     sampler_8x8[index].dw4.mr_threshold = 5;
1058     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1059     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1060     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1061     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1062     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1063     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1064     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1065     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1066     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1067     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1068     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1069     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1070     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1071     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1072     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1073     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1074     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1075     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1076     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1077     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1078     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1079     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1080     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1081     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1082     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1083     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1084     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1085     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1086     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1087     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1088     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1089     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1090     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1091     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1092     sampler_8x8[index].dw13.limiter_boost = 0;
1093     sampler_8x8[index].dw13.minimum_limiter = 10;
1094     sampler_8x8[index].dw13.maximum_limiter = 11;
1095     sampler_8x8[index].dw14.clip_limiter = 130;
1096     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1097                       I915_GEM_DOMAIN_RENDER, 
1098                       0,
1099                       0,
1100                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1101                       pp_context->sampler_state_table.bo_8x8);
1102
1103     dri_bo_map(pp_context->sampler_state_table.bo_8x8_uv, True);
1104     assert(pp_context->sampler_state_table.bo_8x8_uv->virtual);
1105     assert(sizeof(*sampler_8x8_state) == sizeof(int) * 138);
1106     sampler_8x8_state = pp_context->sampler_state_table.bo_8x8_uv->virtual;
1107     memset(sampler_8x8_state, 0, sizeof(*sampler_8x8_state));
1108     sampler_8x8_state->dw136.default_sharpness_level = 0;
1109     sampler_8x8_state->dw137.adaptive_filter_for_all_channel = 0;
1110     sampler_8x8_state->dw137.bypass_y_adaptive_filtering = 1;
1111     sampler_8x8_state->dw137.bypass_x_adaptive_filtering = 1;
1112     dri_bo_unmap(pp_context->sampler_state_table.bo_8x8_uv);
1113
1114     /* sample_8x8 UV index 2 */
1115     index = 2;
1116     memset(&sampler_8x8[index], 0, sizeof(*sampler_8x8));
1117     sampler_8x8[index].dw0.avs_filter_type = AVS_FILTER_NEAREST;
1118     sampler_8x8[index].dw0.ief_bypass = 0;
1119     sampler_8x8[index].dw0.ief_filter_type = IEF_FILTER_DETAIL;
1120     sampler_8x8[index].dw0.ief_filter_size = IEF_FILTER_SIZE_5X5;
1121     sampler_8x8[index].dw1.sampler_8x8_state_pointer = pp_context->sampler_state_table.bo_8x8_uv->offset >> 5;
1122     sampler_8x8[index].dw2.global_noise_estimation = 22;
1123     sampler_8x8[index].dw2.strong_edge_threshold = 8;
1124     sampler_8x8[index].dw2.weak_edge_threshold = 1;
1125     sampler_8x8[index].dw3.strong_edge_weight = 7;
1126     sampler_8x8[index].dw3.regular_weight = 2;
1127     sampler_8x8[index].dw3.non_edge_weight = 0;
1128     sampler_8x8[index].dw3.gain_factor = 40;
1129     sampler_8x8[index].dw4.steepness_boost = 0;
1130     sampler_8x8[index].dw4.steepness_threshold = 0;
1131     sampler_8x8[index].dw4.mr_boost = 0;
1132     sampler_8x8[index].dw4.mr_threshold = 5;
1133     sampler_8x8[index].dw5.pwl1_point_1 = 4;
1134     sampler_8x8[index].dw5.pwl1_point_2 = 12;
1135     sampler_8x8[index].dw5.pwl1_point_3 = 16;
1136     sampler_8x8[index].dw5.pwl1_point_4 = 26;
1137     sampler_8x8[index].dw6.pwl1_point_5 = 40;
1138     sampler_8x8[index].dw6.pwl1_point_6 = 160;
1139     sampler_8x8[index].dw6.pwl1_r3_bias_0 = 127;
1140     sampler_8x8[index].dw6.pwl1_r3_bias_1 = 98;
1141     sampler_8x8[index].dw7.pwl1_r3_bias_2 = 88;
1142     sampler_8x8[index].dw7.pwl1_r3_bias_3 = 64;
1143     sampler_8x8[index].dw7.pwl1_r3_bias_4 = 44;
1144     sampler_8x8[index].dw7.pwl1_r3_bias_5 = 0;
1145     sampler_8x8[index].dw8.pwl1_r3_bias_6 = 0;
1146     sampler_8x8[index].dw8.pwl1_r5_bias_0 = 3;
1147     sampler_8x8[index].dw8.pwl1_r5_bias_1 = 32;
1148     sampler_8x8[index].dw8.pwl1_r5_bias_2 = 32;
1149     sampler_8x8[index].dw9.pwl1_r5_bias_3 = 58;
1150     sampler_8x8[index].dw9.pwl1_r5_bias_4 = 100;
1151     sampler_8x8[index].dw9.pwl1_r5_bias_5 = 108;
1152     sampler_8x8[index].dw9.pwl1_r5_bias_6 = 88;
1153     sampler_8x8[index].dw10.pwl1_r3_slope_0 = -116;
1154     sampler_8x8[index].dw10.pwl1_r3_slope_1 = -20;
1155     sampler_8x8[index].dw10.pwl1_r3_slope_2 = -96;
1156     sampler_8x8[index].dw10.pwl1_r3_slope_3 = -32;
1157     sampler_8x8[index].dw11.pwl1_r3_slope_4 = -50;
1158     sampler_8x8[index].dw11.pwl1_r3_slope_5 = 0;
1159     sampler_8x8[index].dw11.pwl1_r3_slope_6 = 0;
1160     sampler_8x8[index].dw11.pwl1_r5_slope_0 = 116;
1161     sampler_8x8[index].dw12.pwl1_r5_slope_1 = 0;
1162     sampler_8x8[index].dw12.pwl1_r5_slope_2 = 114;
1163     sampler_8x8[index].dw12.pwl1_r5_slope_3 = 67;
1164     sampler_8x8[index].dw12.pwl1_r5_slope_4 = 9;
1165     sampler_8x8[index].dw13.pwl1_r5_slope_5 = -3;
1166     sampler_8x8[index].dw13.pwl1_r5_slope_6 = -15;
1167     sampler_8x8[index].dw13.limiter_boost = 0;
1168     sampler_8x8[index].dw13.minimum_limiter = 10;
1169     sampler_8x8[index].dw13.maximum_limiter = 11;
1170     sampler_8x8[index].dw14.clip_limiter = 130;
1171     dri_bo_emit_reloc(pp_context->sampler_state_table.bo,
1172                       I915_GEM_DOMAIN_RENDER, 
1173                       0,
1174                       0,
1175                       sizeof(*sampler_8x8) * index + offsetof(struct i965_sampler_8x8, dw1),
1176                       pp_context->sampler_state_table.bo_8x8_uv);
1177
1178     dri_bo_unmap(pp_context->sampler_state_table.bo);
1179
1180     /* private function & data */
1181     pp_context->pp_x_steps = pp_avs_x_steps;
1182     pp_context->pp_y_steps = pp_avs_y_steps;
1183     pp_context->pp_set_block_parameter = pp_avs_set_block_parameter;
1184
1185     pp_avs_context->dest_x = dst_rect->x;
1186     pp_avs_context->dest_y = dst_rect->y;
1187     pp_avs_context->dest_w = ALIGN(dst_rect->width, 16);
1188     pp_avs_context->dest_h = ALIGN(dst_rect->height, 16);
1189     pp_avs_context->src_normalized_x = (float)src_rect->x / in_w / out_w;
1190     pp_avs_context->src_normalized_y = (float)src_rect->y / in_h / out_h;
1191     pp_avs_context->src_w = src_rect->width;
1192     pp_avs_context->src_h = src_rect->height;
1193
1194     pp_static_parameter.grf4.r4_2.avs.nlas = 1;
1195     pp_static_parameter.grf1.r1_6.normalized_video_y_scaling_step = (float) src_rect->height / in_h / out_h;
1196
1197     pp_inline_parameter.grf5.normalized_video_x_scaling_step = (float) src_rect->width / in_w / out_w;
1198     pp_inline_parameter.grf5.block_count_x = 1;        /* M x 1 */
1199     pp_inline_parameter.grf5.number_blocks = pp_avs_context->dest_h / 8;
1200     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1201     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1202     pp_inline_parameter.grf6.video_step_delta = 0.0;
1203 }
1204
1205 static int
1206 pp_dndi_x_steps(void *private_context)
1207 {
1208     return 1;
1209 }
1210
1211 static int
1212 pp_dndi_y_steps(void *private_context)
1213 {
1214     struct pp_dndi_context *pp_dndi_context = private_context;
1215
1216     return pp_dndi_context->dest_h / 4;
1217 }
1218
1219 static int
1220 pp_dndi_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
1221 {
1222     pp_inline_parameter.grf5.destination_block_horizontal_origin = x * 16;
1223     pp_inline_parameter.grf5.destination_block_vertical_origin = y * 4;
1224
1225     return 0;
1226 }
1227
1228 static 
1229 void pp_nv12_dndi_initialize(VADriverContextP ctx, struct i965_post_processing_context *pp_context,
1230                              VASurfaceID in_surface_id, VASurfaceID out_surface_id,
1231                              const VARectangle *src_rect, const VARectangle *dst_rect)
1232 {
1233     struct i965_driver_data *i965 = i965_driver_data(ctx);
1234     struct pp_dndi_context *pp_dndi_context = (struct pp_dndi_context *)&pp_context->private_context;
1235     struct object_surface *obj_surface;
1236     struct i965_sampler_dndi *sampler_dndi;
1237     int index;
1238     int w, h;
1239     int orig_w, orig_h;
1240
1241     /* surface */
1242     obj_surface = SURFACE(in_surface_id);
1243     orig_w = obj_surface->orig_width;
1244     orig_h = obj_surface->orig_height;
1245     w = obj_surface->width;
1246     h = obj_surface->height;
1247
1248     if (pp_context->stmm.bo == NULL) {
1249         pp_context->stmm.bo = dri_bo_alloc(i965->intel.bufmgr,
1250                                            "STMM surface",
1251                                            w * h,
1252                                            4096);
1253         assert(pp_context->stmm.bo);
1254     }
1255
1256     /* source UV surface index 2 */
1257     i965_pp_set_surface_state(ctx, pp_context,
1258                               obj_surface->bo, w * h,
1259                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1260                               2, 0);
1261
1262     /* source YUV surface index 4 */
1263     i965_pp_set_surface2_state(ctx, pp_context,
1264                                obj_surface->bo, 0,
1265                                orig_w, orig_w, w,
1266                                0, h,
1267                                SURFACE_FORMAT_PLANAR_420_8, 1,
1268                                4);
1269
1270     /* source STMM surface index 20 */
1271     i965_pp_set_surface_state(ctx, pp_context,
1272                               pp_context->stmm.bo, 0,
1273                               orig_w, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1274                               20, 1);
1275
1276     /* destination surface */
1277     obj_surface = SURFACE(out_surface_id);
1278     orig_w = obj_surface->orig_width;
1279     orig_h = obj_surface->orig_height;
1280     w = obj_surface->width;
1281     h = obj_surface->height;
1282
1283     /* destination Y surface index 7 */
1284     i965_pp_set_surface_state(ctx, pp_context,
1285                               obj_surface->bo, 0,
1286                               orig_w / 4, orig_h, w, I965_SURFACEFORMAT_R8_UNORM,
1287                               7, 1);
1288
1289     /* destination UV surface index 8 */
1290     i965_pp_set_surface_state(ctx, pp_context,
1291                               obj_surface->bo, w * h,
1292                               orig_w / 4, orig_h / 2, w, I965_SURFACEFORMAT_R8G8_UNORM,
1293                               8, 1);
1294     /* sampler dndi */
1295     dri_bo_map(pp_context->sampler_state_table.bo, True);
1296     assert(pp_context->sampler_state_table.bo->virtual);
1297     assert(sizeof(*sampler_dndi) == sizeof(int) * 8);
1298     sampler_dndi = pp_context->sampler_state_table.bo->virtual;
1299
1300     /* sample dndi index 1 */
1301     index = 0;
1302     sampler_dndi[index].dw0.denoise_asd_threshold = 0;
1303     sampler_dndi[index].dw0.denoise_history_delta = 8;          // 0-15, default is 8
1304     sampler_dndi[index].dw0.denoise_maximum_history = 128;      // 128-240
1305     sampler_dndi[index].dw0.denoise_stad_threshold = 0;
1306
1307     sampler_dndi[index].dw1.denoise_threshold_for_sum_of_complexity_measure = 64;
1308     sampler_dndi[index].dw1.denoise_moving_pixel_threshold = 0;
1309     sampler_dndi[index].dw1.stmm_c2 = 0;
1310     sampler_dndi[index].dw1.low_temporal_difference_threshold = 8;
1311     sampler_dndi[index].dw1.temporal_difference_threshold = 16;
1312
1313     sampler_dndi[index].dw2.block_noise_estimate_noise_threshold = 15;   // 0-31
1314     sampler_dndi[index].dw2.block_noise_estimate_edge_threshold = 7;    // 0-15
1315     sampler_dndi[index].dw2.denoise_edge_threshold = 7;                 // 0-15
1316     sampler_dndi[index].dw2.good_neighbor_threshold = 7;                // 0-63
1317
1318     sampler_dndi[index].dw3.maximum_stmm = 128;
1319     sampler_dndi[index].dw3.multipler_for_vecm = 2;
1320     sampler_dndi[index].dw3.blending_constant_across_time_for_small_values_of_stmm = 0;
1321     sampler_dndi[index].dw3.blending_constant_across_time_for_large_values_of_stmm = 64;
1322     sampler_dndi[index].dw3.stmm_blending_constant_select = 0;
1323
1324     sampler_dndi[index].dw4.sdi_delta = 8;
1325     sampler_dndi[index].dw4.sdi_threshold = 128;
1326     sampler_dndi[index].dw4.stmm_output_shift = 7;                      // stmm_max - stmm_min = 2 ^ stmm_output_shift
1327     sampler_dndi[index].dw4.stmm_shift_up = 0;
1328     sampler_dndi[index].dw4.stmm_shift_down = 0;
1329     sampler_dndi[index].dw4.minimum_stmm = 0;
1330
1331     sampler_dndi[index].dw5.fmd_temporal_difference_threshold = 0;
1332     sampler_dndi[index].dw5.sdi_fallback_mode_2_constant = 0;
1333     sampler_dndi[index].dw5.sdi_fallback_mode_1_t2_constant = 0;
1334     sampler_dndi[index].dw5.sdi_fallback_mode_1_t1_constant = 0;
1335
1336     sampler_dndi[index].dw6.dn_enable = 1;
1337     sampler_dndi[index].dw6.di_enable = 1;
1338     sampler_dndi[index].dw6.di_partial = 0;
1339     sampler_dndi[index].dw6.dndi_top_first = 1;
1340     sampler_dndi[index].dw6.dndi_stream_id = 1;
1341     sampler_dndi[index].dw6.dndi_first_frame = 1;
1342     sampler_dndi[index].dw6.progressive_dn = 0;
1343     sampler_dndi[index].dw6.fmd_tear_threshold = 32;
1344     sampler_dndi[index].dw6.fmd2_vertical_difference_threshold = 32;
1345     sampler_dndi[index].dw6.fmd1_vertical_difference_threshold = 32;
1346
1347     sampler_dndi[index].dw7.fmd_for_1st_field_of_current_frame = 2;
1348     sampler_dndi[index].dw7.fmd_for_2nd_field_of_previous_frame = 1;
1349     sampler_dndi[index].dw7.vdi_walker_enable = 0;
1350     sampler_dndi[index].dw7.column_width_minus1 = w / 16;
1351
1352     dri_bo_unmap(pp_context->sampler_state_table.bo);
1353
1354     /* private function & data */
1355     pp_context->pp_x_steps = pp_dndi_x_steps;
1356     pp_context->pp_y_steps = pp_dndi_y_steps;
1357     pp_context->pp_set_block_parameter = pp_dndi_set_block_parameter;
1358
1359     pp_static_parameter.grf1.statistics_surface_picth = w / 2;
1360     pp_static_parameter.grf1.r1_6.di.top_field_first = 0;
1361     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m2 = 64;
1362     pp_static_parameter.grf4.r4_2.di.motion_history_coefficient_m1 = 192;
1363
1364     pp_inline_parameter.grf5.block_count_x = w / 16;   /* 1 x N */
1365     pp_inline_parameter.grf5.number_blocks = w / 16;
1366     pp_inline_parameter.grf5.block_vertical_mask = 0xff;
1367     pp_inline_parameter.grf5.block_horizontal_mask = 0xffff;
1368
1369     pp_dndi_context->dest_w = w;
1370     pp_dndi_context->dest_h = h;
1371 }
1372
1373 static void
1374 ironlake_pp_initialize(
1375     VADriverContextP   ctx,
1376     struct i965_post_processing_context *pp_context,
1377     VASurfaceID        in_surface_id,
1378     VASurfaceID        out_surface_id,
1379     const VARectangle *src_rect,
1380     const VARectangle *dst_rect,
1381     int                pp_index
1382 )
1383 {
1384     struct i965_driver_data *i965 = i965_driver_data(ctx);
1385     struct pp_module *pp_module;
1386     dri_bo *bo;
1387     int i;
1388
1389     dri_bo_unreference(pp_context->curbe.bo);
1390     bo = dri_bo_alloc(i965->intel.bufmgr,
1391                       "constant buffer",
1392                       4096, 
1393                       4096);
1394     assert(bo);
1395     pp_context->curbe.bo = bo;
1396
1397     dri_bo_unreference(pp_context->binding_table.bo);
1398     bo = dri_bo_alloc(i965->intel.bufmgr, 
1399                       "binding table",
1400                       sizeof(unsigned int), 
1401                       4096);
1402     assert(bo);
1403     pp_context->binding_table.bo = bo;
1404
1405     dri_bo_unreference(pp_context->idrt.bo);
1406     bo = dri_bo_alloc(i965->intel.bufmgr, 
1407                       "interface discriptor", 
1408                       sizeof(struct i965_interface_descriptor), 
1409                       4096);
1410     assert(bo);
1411     pp_context->idrt.bo = bo;
1412     pp_context->idrt.num_interface_descriptors = 0;
1413
1414     dri_bo_unreference(pp_context->sampler_state_table.bo);
1415     bo = dri_bo_alloc(i965->intel.bufmgr, 
1416                       "sampler state table", 
1417                       4096,
1418                       4096);
1419     assert(bo);
1420     dri_bo_map(bo, True);
1421     memset(bo->virtual, 0, bo->size);
1422     dri_bo_unmap(bo);
1423     pp_context->sampler_state_table.bo = bo;
1424
1425     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1426     bo = dri_bo_alloc(i965->intel.bufmgr, 
1427                       "sampler 8x8 state ",
1428                       4096,
1429                       4096);
1430     assert(bo);
1431     pp_context->sampler_state_table.bo_8x8 = bo;
1432
1433     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1434     bo = dri_bo_alloc(i965->intel.bufmgr, 
1435                       "sampler 8x8 state ",
1436                       4096,
1437                       4096);
1438     assert(bo);
1439     pp_context->sampler_state_table.bo_8x8_uv = bo;
1440
1441     dri_bo_unreference(pp_context->vfe_state.bo);
1442     bo = dri_bo_alloc(i965->intel.bufmgr, 
1443                       "vfe state", 
1444                       sizeof(struct i965_vfe_state), 
1445                       4096);
1446     assert(bo);
1447     pp_context->vfe_state.bo = bo;
1448     
1449     for (i = 0; i < MAX_PP_SURFACES; i++) {
1450         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1451         pp_context->surfaces[i].ss_bo = NULL;
1452
1453         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1454         pp_context->surfaces[i].s_bo = NULL;
1455     }
1456
1457     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1458     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1459     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1460     pp_context->current_pp = pp_index;
1461     pp_module = &pp_context->pp_modules[pp_index];
1462     
1463     if (pp_module->initialize)
1464         pp_module->initialize(ctx, pp_context,
1465                               in_surface_id, out_surface_id,
1466                               src_rect, dst_rect);
1467 }
1468
1469 static void
1470 ironlake_post_processing(
1471     VADriverContextP   ctx,
1472     struct i965_post_processing_context *pp_context,
1473     VASurfaceID        in_surface_id,
1474     VASurfaceID        out_surface_id,
1475     const VARectangle *src_rect,
1476     const VARectangle *dst_rect,
1477     int                pp_index
1478 )
1479 {
1480     ironlake_pp_initialize(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1481     ironlake_pp_states_setup(ctx, pp_context);
1482     ironlake_pp_pipeline_setup(ctx, pp_context);
1483 }
1484
1485 static void
1486 gen6_pp_initialize(
1487     VADriverContextP   ctx,
1488     struct i965_post_processing_context *pp_context,
1489     VASurfaceID        in_surface_id,
1490     VASurfaceID        out_surface_id,
1491     const VARectangle *src_rect,
1492     const VARectangle *dst_rect,
1493     int                pp_index
1494 )
1495 {
1496     struct i965_driver_data *i965 = i965_driver_data(ctx);
1497     struct pp_module *pp_module;
1498     dri_bo *bo;
1499     int i;
1500
1501     dri_bo_unreference(pp_context->curbe.bo);
1502     bo = dri_bo_alloc(i965->intel.bufmgr,
1503                       "constant buffer",
1504                       4096, 
1505                       4096);
1506     assert(bo);
1507     pp_context->curbe.bo = bo;
1508
1509     dri_bo_unreference(pp_context->binding_table.bo);
1510     bo = dri_bo_alloc(i965->intel.bufmgr, 
1511                       "binding table",
1512                       sizeof(unsigned int), 
1513                       4096);
1514     assert(bo);
1515     pp_context->binding_table.bo = bo;
1516
1517     dri_bo_unreference(pp_context->idrt.bo);
1518     bo = dri_bo_alloc(i965->intel.bufmgr, 
1519                       "interface discriptor", 
1520                       sizeof(struct gen6_interface_descriptor_data), 
1521                       4096);
1522     assert(bo);
1523     pp_context->idrt.bo = bo;
1524     pp_context->idrt.num_interface_descriptors = 0;
1525
1526     dri_bo_unreference(pp_context->sampler_state_table.bo);
1527     bo = dri_bo_alloc(i965->intel.bufmgr, 
1528                       "sampler state table", 
1529                       4096,
1530                       4096);
1531     assert(bo);
1532     dri_bo_map(bo, True);
1533     memset(bo->virtual, 0, bo->size);
1534     dri_bo_unmap(bo);
1535     pp_context->sampler_state_table.bo = bo;
1536
1537     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1538     bo = dri_bo_alloc(i965->intel.bufmgr, 
1539                       "sampler 8x8 state ",
1540                       4096,
1541                       4096);
1542     assert(bo);
1543     pp_context->sampler_state_table.bo_8x8 = bo;
1544
1545     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1546     bo = dri_bo_alloc(i965->intel.bufmgr, 
1547                       "sampler 8x8 state ",
1548                       4096,
1549                       4096);
1550     assert(bo);
1551     pp_context->sampler_state_table.bo_8x8_uv = bo;
1552
1553     dri_bo_unreference(pp_context->vfe_state.bo);
1554     bo = dri_bo_alloc(i965->intel.bufmgr, 
1555                       "vfe state", 
1556                       sizeof(struct i965_vfe_state), 
1557                       4096);
1558     assert(bo);
1559     pp_context->vfe_state.bo = bo;
1560     
1561     for (i = 0; i < MAX_PP_SURFACES; i++) {
1562         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1563         pp_context->surfaces[i].ss_bo = NULL;
1564
1565         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1566         pp_context->surfaces[i].s_bo = NULL;
1567     }
1568
1569     memset(&pp_static_parameter, 0, sizeof(pp_static_parameter));
1570     memset(&pp_inline_parameter, 0, sizeof(pp_inline_parameter));
1571     assert(pp_index >= PP_NULL && pp_index < NUM_PP_MODULES);
1572     pp_context->current_pp = pp_index;
1573     pp_module = &pp_context->pp_modules[pp_index];
1574     
1575     if (pp_module->initialize)
1576         pp_module->initialize(ctx, pp_context,
1577                               in_surface_id, out_surface_id,
1578                               src_rect, dst_rect);
1579 }
1580
1581 static void
1582 gen6_pp_binding_table(struct i965_post_processing_context *pp_context)
1583 {
1584     unsigned int *binding_table;
1585     dri_bo *bo = pp_context->binding_table.bo;
1586     int i;
1587
1588     dri_bo_map(bo, 1);
1589     assert(bo->virtual);
1590     binding_table = bo->virtual;
1591     memset(binding_table, 0, bo->size);
1592
1593     for (i = 0; i < MAX_PP_SURFACES; i++) {
1594         if (pp_context->surfaces[i].ss_bo) {
1595             assert(pp_context->surfaces[i].s_bo);
1596
1597             binding_table[i] = pp_context->surfaces[i].ss_bo->offset;
1598             dri_bo_emit_reloc(bo,
1599                               I915_GEM_DOMAIN_INSTRUCTION, 0,
1600                               0,
1601                               i * sizeof(*binding_table),
1602                               pp_context->surfaces[i].ss_bo);
1603         }
1604     
1605     }
1606
1607     dri_bo_unmap(bo);
1608 }
1609
1610 static void
1611 gen6_pp_interface_descriptor_table(struct i965_post_processing_context *pp_context)
1612 {
1613     struct gen6_interface_descriptor_data *desc;
1614     dri_bo *bo;
1615     int pp_index = pp_context->current_pp;
1616
1617     bo = pp_context->idrt.bo;
1618     dri_bo_map(bo, True);
1619     assert(bo->virtual);
1620     desc = bo->virtual;
1621     memset(desc, 0, sizeof(*desc));
1622     desc->desc0.kernel_start_pointer = 
1623         pp_context->pp_modules[pp_index].kernel.bo->offset >> 6; /* reloc */
1624     desc->desc1.single_program_flow = 1;
1625     desc->desc1.floating_point_mode = FLOATING_POINT_IEEE_754;
1626     desc->desc2.sampler_count = 1;      /* 1 - 4 samplers used */
1627     desc->desc2.sampler_state_pointer = 
1628         pp_context->sampler_state_table.bo->offset >> 5;
1629     desc->desc3.binding_table_entry_count = 0;
1630     desc->desc3.binding_table_pointer = 
1631         pp_context->binding_table.bo->offset >> 5; /*reloc */
1632     desc->desc4.constant_urb_entry_read_offset = 0;
1633     desc->desc4.constant_urb_entry_read_length = 4; /* grf 1-4 */
1634
1635     dri_bo_emit_reloc(bo,
1636                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1637                       0,
1638                       offsetof(struct gen6_interface_descriptor_data, desc0),
1639                       pp_context->pp_modules[pp_index].kernel.bo);
1640
1641     dri_bo_emit_reloc(bo,
1642                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1643                       desc->desc2.sampler_count << 2,
1644                       offsetof(struct gen6_interface_descriptor_data, desc2),
1645                       pp_context->sampler_state_table.bo);
1646
1647     dri_bo_emit_reloc(bo,
1648                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1649                       desc->desc3.binding_table_entry_count,
1650                       offsetof(struct gen6_interface_descriptor_data, desc3),
1651                       pp_context->binding_table.bo);
1652
1653     dri_bo_unmap(bo);
1654     pp_context->idrt.num_interface_descriptors++;
1655 }
1656
1657 static void
1658 gen6_pp_upload_constants(struct i965_post_processing_context *pp_context)
1659 {
1660     unsigned char *constant_buffer;
1661
1662     assert(sizeof(pp_static_parameter) == 128);
1663     dri_bo_map(pp_context->curbe.bo, 1);
1664     assert(pp_context->curbe.bo->virtual);
1665     constant_buffer = pp_context->curbe.bo->virtual;
1666     memcpy(constant_buffer, &pp_static_parameter, sizeof(pp_static_parameter));
1667     dri_bo_unmap(pp_context->curbe.bo);
1668 }
1669
1670 static void
1671 gen6_pp_states_setup(VADriverContextP ctx,
1672                      struct i965_post_processing_context *pp_context)
1673 {
1674     gen6_pp_binding_table(pp_context);
1675     gen6_pp_interface_descriptor_table(pp_context);
1676     gen6_pp_upload_constants(pp_context);
1677 }
1678
1679 static void
1680 gen6_pp_pipeline_select(VADriverContextP ctx,
1681                         struct i965_post_processing_context *pp_context)
1682 {
1683     struct intel_batchbuffer *batch = pp_context->batch;
1684
1685     BEGIN_BATCH(batch, 1);
1686     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
1687     ADVANCE_BATCH(batch);
1688 }
1689
1690 static void
1691 gen6_pp_state_base_address(VADriverContextP ctx,
1692                            struct i965_post_processing_context *pp_context)
1693 {
1694     struct intel_batchbuffer *batch = pp_context->batch;
1695
1696     BEGIN_BATCH(batch, 10);
1697     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1698     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1699     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1700     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1701     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1702     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1703     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1704     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1705     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1706     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1707     ADVANCE_BATCH(batch);
1708 }
1709
1710 static void
1711 gen6_pp_vfe_state(VADriverContextP ctx,
1712                   struct i965_post_processing_context *pp_context)
1713 {
1714     struct intel_batchbuffer *batch = pp_context->batch;
1715
1716     BEGIN_BATCH(batch, 8);
1717     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
1718     OUT_BATCH(batch, 0);
1719     OUT_BATCH(batch,
1720               (pp_context->urb.num_vfe_entries - 1) << 16 |
1721               pp_context->urb.num_vfe_entries << 8);
1722     OUT_BATCH(batch, 0);
1723     OUT_BATCH(batch,
1724               (pp_context->urb.size_vfe_entry * 2) << 16 |  /* in 256 bits unit */
1725               (pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 2 - 1));            /* in 256 bits unit */
1726     OUT_BATCH(batch, 0);
1727     OUT_BATCH(batch, 0);
1728     OUT_BATCH(batch, 0);
1729     ADVANCE_BATCH(batch);
1730 }
1731
1732 static void
1733 gen6_pp_curbe_load(VADriverContextP ctx,
1734                    struct i965_post_processing_context *pp_context)
1735 {
1736     struct intel_batchbuffer *batch = pp_context->batch;
1737
1738     assert(pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512 <= pp_context->curbe.bo->size);
1739
1740     BEGIN_BATCH(batch, 4);
1741     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1742     OUT_BATCH(batch, 0);
1743     OUT_BATCH(batch,
1744               pp_context->urb.size_cs_entry * pp_context->urb.num_cs_entries * 512);
1745     OUT_RELOC(batch, 
1746               pp_context->curbe.bo,
1747               I915_GEM_DOMAIN_INSTRUCTION, 0,
1748               0);
1749     ADVANCE_BATCH(batch);
1750 }
1751
1752 static void
1753 gen6_interface_descriptor_load(VADriverContextP ctx,
1754                                struct i965_post_processing_context *pp_context)
1755 {
1756     struct intel_batchbuffer *batch = pp_context->batch;
1757
1758     BEGIN_BATCH(batch, 4);
1759     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
1760     OUT_BATCH(batch, 0);
1761     OUT_BATCH(batch,
1762               pp_context->idrt.num_interface_descriptors * sizeof(struct gen6_interface_descriptor_data));
1763     OUT_RELOC(batch, 
1764               pp_context->idrt.bo,
1765               I915_GEM_DOMAIN_INSTRUCTION, 0,
1766               0);
1767     ADVANCE_BATCH(batch);
1768 }
1769
1770 static void
1771 gen6_pp_object_walker(VADriverContextP ctx,
1772                       struct i965_post_processing_context *pp_context)
1773 {
1774     struct intel_batchbuffer *batch = pp_context->batch;
1775     int x, x_steps, y, y_steps;
1776
1777     x_steps = pp_context->pp_x_steps(&pp_context->private_context);
1778     y_steps = pp_context->pp_y_steps(&pp_context->private_context);
1779
1780     for (y = 0; y < y_steps; y++) {
1781         for (x = 0; x < x_steps; x++) {
1782             if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
1783                 BEGIN_BATCH(batch, 22);
1784                 OUT_BATCH(batch, CMD_MEDIA_OBJECT | 20);
1785                 OUT_BATCH(batch, 0);
1786                 OUT_BATCH(batch, 0); /* no indirect data */
1787                 OUT_BATCH(batch, 0);
1788                 OUT_BATCH(batch, 0); /* scoreboard */
1789                 OUT_BATCH(batch, 0);
1790
1791                 /* inline data grf 5-6 */
1792                 assert(sizeof(pp_inline_parameter) == 64);
1793                 intel_batchbuffer_data(batch, &pp_inline_parameter, sizeof(pp_inline_parameter));
1794
1795                 ADVANCE_BATCH(batch);
1796             }
1797         }
1798     }
1799 }
1800
1801 static void
1802 gen6_pp_pipeline_setup(VADriverContextP ctx,
1803                        struct i965_post_processing_context *pp_context)
1804 {
1805     struct intel_batchbuffer *batch = pp_context->batch;
1806
1807     intel_batchbuffer_start_atomic(batch, 0x1000);
1808     intel_batchbuffer_emit_mi_flush(batch);
1809     gen6_pp_pipeline_select(ctx, pp_context);
1810     gen6_pp_curbe_load(ctx, pp_context);
1811     gen6_interface_descriptor_load(ctx, pp_context);
1812     gen6_pp_state_base_address(ctx, pp_context);
1813     gen6_pp_vfe_state(ctx, pp_context);
1814     gen6_pp_object_walker(ctx, pp_context);
1815     intel_batchbuffer_end_atomic(batch);
1816 }
1817
1818 static void
1819 gen6_post_processing(
1820     VADriverContextP   ctx,
1821     struct i965_post_processing_context *pp_context,
1822     VASurfaceID        in_surface_id,
1823     VASurfaceID        out_surface_id,
1824     const VARectangle *src_rect,
1825     const VARectangle *dst_rect,
1826     int                pp_index
1827 )
1828 {
1829     gen6_pp_initialize(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1830     gen6_pp_states_setup(ctx, pp_context);
1831     gen6_pp_pipeline_setup(ctx, pp_context);
1832 }
1833
1834 static void
1835 i965_post_processing_internal(
1836     VADriverContextP   ctx,
1837     struct i965_post_processing_context *pp_context,
1838     VASurfaceID        in_surface_id,
1839     VASurfaceID        out_surface_id,
1840     const VARectangle *src_rect,
1841     const VARectangle *dst_rect,
1842     int                pp_index
1843 )
1844 {
1845     struct i965_driver_data *i965 = i965_driver_data(ctx);
1846
1847     if (IS_GEN6(i965->intel.device_id) ||
1848         IS_GEN7(i965->intel.device_id))
1849         gen6_post_processing(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1850     else
1851         ironlake_post_processing(ctx, pp_context, in_surface_id, out_surface_id, src_rect, dst_rect, pp_index);
1852 }
1853
1854 VAStatus 
1855 i965_DestroySurfaces(VADriverContextP ctx,
1856                      VASurfaceID *surface_list,
1857                      int num_surfaces);
1858 VAStatus 
1859 i965_CreateSurfaces(VADriverContextP ctx,
1860                     int width,
1861                     int height,
1862                     int format,
1863                     int num_surfaces,
1864                     VASurfaceID *surfaces);
1865 VASurfaceID
1866 i965_post_processing(
1867     VADriverContextP   ctx,
1868     VASurfaceID        surface,
1869     const VARectangle *src_rect,
1870     const VARectangle *dst_rect,
1871     unsigned int       flags,
1872     int               *has_done_scaling  
1873 )
1874 {
1875     struct i965_driver_data *i965 = i965_driver_data(ctx);
1876     VASurfaceID in_surface_id = surface;
1877     VASurfaceID out_surface_id = VA_INVALID_ID;
1878
1879     if (HAS_PP(i965)) {
1880         /* Currently only support post processing for NV12 surface */
1881         if (i965->render_state.interleaved_uv) {
1882             struct object_surface *obj_surface;
1883             VAStatus status;
1884
1885             if (flags & I965_PP_FLAG_DEINTERLACING) {
1886                 obj_surface = SURFACE(in_surface_id);
1887                 status = i965_CreateSurfaces(ctx,
1888                                              obj_surface->orig_width,
1889                                              obj_surface->orig_height,
1890                                              VA_RT_FORMAT_YUV420,
1891                                              1,
1892                                              &out_surface_id);
1893                 assert(status == VA_STATUS_SUCCESS);
1894                 obj_surface = SURFACE(out_surface_id);
1895                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
1896                 i965_post_processing_internal(ctx, i965->pp_context,
1897                                               in_surface_id, out_surface_id,
1898                                               src_rect, dst_rect,
1899                                               PP_NV12_DNDI);
1900             }
1901
1902             if (flags & I965_PP_FLAG_AVS) {
1903                 struct i965_render_state *render_state = &i965->render_state;
1904                 struct intel_region *dest_region = render_state->draw_region;
1905
1906                 if (out_surface_id != VA_INVALID_ID)
1907                     in_surface_id = out_surface_id;
1908
1909                 status = i965_CreateSurfaces(ctx,
1910                                              dest_region->width,
1911                                              dest_region->height,
1912                                              VA_RT_FORMAT_YUV420,
1913                                              1,
1914                                              &out_surface_id);
1915                 assert(status == VA_STATUS_SUCCESS);
1916                 obj_surface = SURFACE(out_surface_id);
1917                 i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
1918                 i965_post_processing_internal(ctx, i965->pp_context,
1919                                               in_surface_id, out_surface_id,
1920                                               src_rect, dst_rect,
1921                                               PP_NV12_AVS);
1922
1923                 if (in_surface_id != surface)
1924                     i965_DestroySurfaces(ctx, &in_surface_id, 1);
1925                 
1926                 *has_done_scaling = 1;
1927             }
1928         }
1929     }
1930
1931     return out_surface_id;
1932 }       
1933
1934 static void
1935 i965_post_processing_context_finalize(struct i965_post_processing_context *pp_context)
1936 {
1937     int i;
1938
1939     dri_bo_unreference(pp_context->curbe.bo);
1940     pp_context->curbe.bo = NULL;
1941
1942     for (i = 0; i < MAX_PP_SURFACES; i++) {
1943         dri_bo_unreference(pp_context->surfaces[i].ss_bo);
1944         pp_context->surfaces[i].ss_bo = NULL;
1945
1946         dri_bo_unreference(pp_context->surfaces[i].s_bo);
1947         pp_context->surfaces[i].s_bo = NULL;
1948     }
1949
1950     dri_bo_unreference(pp_context->sampler_state_table.bo);
1951     pp_context->sampler_state_table.bo = NULL;
1952
1953     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8);
1954     pp_context->sampler_state_table.bo_8x8 = NULL;
1955
1956     dri_bo_unreference(pp_context->sampler_state_table.bo_8x8_uv);
1957     pp_context->sampler_state_table.bo_8x8_uv = NULL;
1958
1959     dri_bo_unreference(pp_context->binding_table.bo);
1960     pp_context->binding_table.bo = NULL;
1961
1962     dri_bo_unreference(pp_context->idrt.bo);
1963     pp_context->idrt.bo = NULL;
1964     pp_context->idrt.num_interface_descriptors = 0;
1965
1966     dri_bo_unreference(pp_context->vfe_state.bo);
1967     pp_context->vfe_state.bo = NULL;
1968
1969     dri_bo_unreference(pp_context->stmm.bo);
1970     pp_context->stmm.bo = NULL;
1971
1972     for (i = 0; i < NUM_PP_MODULES; i++) {
1973         struct pp_module *pp_module = &pp_context->pp_modules[i];
1974
1975         dri_bo_unreference(pp_module->kernel.bo);
1976         pp_module->kernel.bo = NULL;
1977     }
1978
1979 }
1980
1981 Bool
1982 i965_post_processing_terminate(VADriverContextP ctx)
1983 {
1984     struct i965_driver_data *i965 = i965_driver_data(ctx);
1985     struct i965_post_processing_context *pp_context = i965->pp_context;
1986
1987     if (pp_context) {
1988         i965_post_processing_context_finalize(pp_context);
1989         free(pp_context);
1990     }
1991
1992     i965->pp_context = NULL;
1993
1994     return True;
1995 }
1996
1997 static void
1998 i965_post_processing_context_init(VADriverContextP ctx,
1999                                   struct i965_post_processing_context *pp_context,
2000                                   struct intel_batchbuffer *batch)
2001 {
2002     struct i965_driver_data *i965 = i965_driver_data(ctx);
2003     int i;
2004
2005     pp_context->urb.size = URB_SIZE((&i965->intel));
2006     pp_context->urb.num_vfe_entries = 32;
2007     pp_context->urb.size_vfe_entry = 1;     /* in 512 bits unit */
2008     pp_context->urb.num_cs_entries = 1;
2009     pp_context->urb.size_cs_entry = 2;      /* in 512 bits unit */
2010     pp_context->urb.vfe_start = 0;
2011     pp_context->urb.cs_start = pp_context->urb.vfe_start + 
2012         pp_context->urb.num_vfe_entries * pp_context->urb.size_vfe_entry;
2013     assert(pp_context->urb.cs_start + 
2014            pp_context->urb.num_cs_entries * pp_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
2015
2016     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen5));
2017     assert(NUM_PP_MODULES == ARRAY_ELEMS(pp_modules_gen6));
2018
2019     if (IS_GEN6(i965->intel.device_id) ||
2020         IS_GEN7(i965->intel.device_id))
2021         memcpy(pp_context->pp_modules, pp_modules_gen6, sizeof(pp_context->pp_modules));
2022     else if (IS_IRONLAKE(i965->intel.device_id))
2023         memcpy(pp_context->pp_modules, pp_modules_gen5, sizeof(pp_context->pp_modules));
2024
2025     for (i = 0; i < NUM_PP_MODULES; i++) {
2026         struct pp_module *pp_module = &pp_context->pp_modules[i];
2027         dri_bo_unreference(pp_module->kernel.bo);
2028         pp_module->kernel.bo = dri_bo_alloc(i965->intel.bufmgr,
2029                                             pp_module->kernel.name,
2030                                             pp_module->kernel.size,
2031                                             4096);
2032         assert(pp_module->kernel.bo);
2033         dri_bo_subdata(pp_module->kernel.bo, 0, pp_module->kernel.size, pp_module->kernel.bin);
2034     }
2035
2036     pp_context->batch = batch;
2037 }
2038
2039 Bool
2040 i965_post_processing_init(VADriverContextP ctx)
2041 {
2042     struct i965_driver_data *i965 = i965_driver_data(ctx);
2043     struct i965_post_processing_context *pp_context = i965->pp_context;
2044
2045     if (HAS_PP(i965)) {
2046         if (pp_context == NULL) {
2047             pp_context = calloc(1, sizeof(*pp_context));
2048             i965_post_processing_context_init(ctx, pp_context, i965->batch);
2049             i965->pp_context = pp_context;
2050         }
2051     }
2052
2053     return True;
2054 }
2055
2056 static void 
2057 i965_proc_picture(VADriverContextP ctx, 
2058                   VAProfile profile, 
2059                   union codec_state *codec_state,
2060                   struct hw_context *hw_context)
2061 {
2062     struct i965_driver_data *i965 = i965_driver_data(ctx);
2063     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2064     struct proc_state *proc_state = &codec_state->proc;
2065     VAProcPipelineParameterBuffer *pipeline_param = (VAProcPipelineParameterBuffer *)proc_state->pipeline_param->buffer;
2066     VAProcInputParameterBuffer *input_param = (VAProcInputParameterBuffer *)proc_state->input_param->buffer;
2067     struct object_surface *obj_surface;
2068
2069     assert(input_param->surface != VA_INVALID_ID);
2070     assert(proc_state->current_render_target != VA_INVALID_ID);
2071
2072     obj_surface = SURFACE(proc_state->current_render_target);
2073     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
2074     i965_post_processing_internal(ctx, &proc_context->pp_context,
2075                                   input_param->surface, proc_state->current_render_target,
2076                                   &input_param->region, &pipeline_param->output_region,
2077                                   PP_NV12_AVS);
2078
2079     intel_batchbuffer_flush(hw_context->batch);
2080 }
2081
2082 static void
2083 i965_proc_context_destroy(void *hw_context)
2084 {
2085     struct i965_proc_context *proc_context = (struct i965_proc_context *)hw_context;
2086
2087     i965_post_processing_context_finalize(&proc_context->pp_context);
2088     intel_batchbuffer_free(proc_context->base.batch);
2089     free(proc_context);
2090 }
2091
2092 struct hw_context *
2093 i965_proc_context_init(VADriverContextP ctx, VAProfile profile)
2094 {
2095     struct intel_driver_data *intel = intel_driver_data(ctx);
2096     struct i965_proc_context *proc_context = calloc(1, sizeof(struct i965_proc_context));
2097
2098     proc_context->base.destroy = i965_proc_context_destroy;
2099     proc_context->base.run = i965_proc_picture;
2100     proc_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2101     i965_post_processing_context_init(ctx, &proc_context->pp_context, proc_context->base.batch);
2102
2103     return (struct hw_context *)proc_context;
2104 }