Handle the MFX change between A stepping and B-stepping for haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "assert.h"
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41
42 #define B0_STEP_REV             2
43 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
44
45 static void
46 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
47                           int standard_select,
48                           struct gen6_encoder_context *gen6_encoder_context)
49 {
50     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
51
52     assert(standard_select == MFX_FORMAT_MPEG2 ||
53            standard_select == MFX_FORMAT_AVC);
54
55     BEGIN_BCS_BATCH(batch, 5);
56     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
57     OUT_BCS_BATCH(batch,
58                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
59                   (MFD_MODE_VLD << 15) | /* VLD mode */
60                   (0 << 10) | /* disable Stream-Out */
61                   (1 << 9)  | /* Post Deblocking Output */
62                   (0 << 8)  | /* Pre Deblocking Output */
63                   (0 << 5)  | /* not in stitch mode */
64                   (1 << 4)  | /* encoding mode */
65                   (standard_select << 0));  /* standard select: avc or mpeg2 */
66     OUT_BCS_BATCH(batch,
67                   (0 << 7)  | /* expand NOA bus flag */
68                   (0 << 6)  | /* disable slice-level clock gating */
69                   (0 << 5)  | /* disable clock gating for NOA */
70                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
71                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
72                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
73                   (0 << 1)  |
74                   (0 << 0));
75     OUT_BCS_BATCH(batch, 0);
76     OUT_BCS_BATCH(batch, 0);
77
78     ADVANCE_BCS_BATCH(batch);
79 }
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         SUBMB_SHAPE_MASK        0x00FF00
84
85 #define         INTER_MV8               (4 << 20)
86 #define         INTER_MV32              (6 << 20)
87
88
89 static void
90 gen75_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
91 {
92     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
94
95     BEGIN_BCS_BATCH(batch, 6);
96
97     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
98     OUT_BCS_BATCH(batch, 0);
99     OUT_BCS_BATCH(batch,
100                   ((mfc_context->surface_state.height - 1) << 18) |
101                   ((mfc_context->surface_state.width - 1) << 4));
102     OUT_BCS_BATCH(batch,
103                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
104                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
105                   (0 << 22) | /* surface object control state, FIXME??? */
106                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
107                   (0 << 2)  | /* must be 0 for interleave U/V */
108                   (1 << 1)  | /* must be tiled */
109                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
110     OUT_BCS_BATCH(batch,
111                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
112                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
113     OUT_BCS_BATCH(batch, 0);
114     ADVANCE_BCS_BATCH(batch);
115 }
116
117 static void
118 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
119                 struct gen6_encoder_context *gen6_encoder_context)
120 {
121     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
122     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
123     int i;
124
125     BEGIN_BCS_BATCH(batch, 61);
126
127     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
128
129     /* the DW1-3 is for pre_deblocking */
130         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
131
132         OUT_BCS_BATCH(batch, 0);
133         OUT_BCS_BATCH(batch, 0);
134      /* the DW4-6 is for the post_deblocking */
135
136     if (mfc_context->post_deblocking_output.bo)
137         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
138                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
139                       0);                                                                                       /* post output addr  */ 
140     else
141         OUT_BCS_BATCH(batch, 0);
142
143         OUT_BCS_BATCH(batch, 0);
144         OUT_BCS_BATCH(batch, 0);
145
146      /* the DW7-9 is for the uncompressed_picture */
147     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
148                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
149                   0); /* uncompressed data */
150
151         OUT_BCS_BATCH(batch, 0);
152         OUT_BCS_BATCH(batch, 0);
153
154      /* the DW10-12 is for the mb status */
155         OUT_BCS_BATCH(batch, 0);
156         OUT_BCS_BATCH(batch, 0);
157         OUT_BCS_BATCH(batch, 0);
158
159      /* the DW13-15 is for the intra_row_store_scratch */
160     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
161                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
162                   0);   
163         OUT_BCS_BATCH(batch, 0);
164         OUT_BCS_BATCH(batch, 0);
165
166      /* the DW16-18 is for the deblocking filter */
167     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
168                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
169                   0);
170         OUT_BCS_BATCH(batch, 0);
171         OUT_BCS_BATCH(batch, 0);
172
173     /* the DW 19-50 is for Reference pictures*/
174     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
175         if ( mfc_context->reference_surfaces[i].bo != NULL) {
176             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
177                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                           0);                   
179         } else {
180             OUT_BCS_BATCH(batch, 0);
181         }
182         OUT_BCS_BATCH(batch, 0);
183     }
184         OUT_BCS_BATCH(batch, 0);
185
186         /* The DW 52-54 is for the MB status buffer */
187         OUT_BCS_BATCH(batch, 0);
188         
189         OUT_BCS_BATCH(batch, 0);
190         OUT_BCS_BATCH(batch, 0);
191
192         /* the DW 55-57 is the ILDB buffer */
193         OUT_BCS_BATCH(batch, 0);
194         OUT_BCS_BATCH(batch, 0);
195         OUT_BCS_BATCH(batch, 0);
196
197         /* the DW 58-60 is the second ILDB buffer */
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_BATCH(batch, 0);
200         OUT_BCS_BATCH(batch, 0);
201     ADVANCE_BCS_BATCH(batch);
202 }
203
204 static void
205 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
206 {
207     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
208     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
209     int i;
210
211     struct i965_driver_data *i965 = i965_driver_data(ctx);
212  
213     if (IS_STEPPING_BPLUS(i965)) {
214         gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context);
215         return;
216     }
217
218     BEGIN_BCS_BATCH(batch, 25);
219
220     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
221
222     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
223
224     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
225                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                   0);                                                                                   /* post output addr  */ 
227
228     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
229                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
230                   0);                                                                                   /* uncompressed data */
231
232     OUT_BCS_BATCH(batch, 0);                                                                                    /* StreamOut data*/
233     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);   
236     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
237                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238                   0);
239     /* 7..22 Reference pictures*/
240     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
241         if ( mfc_context->reference_surfaces[i].bo != NULL) {
242             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
243                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
244                           0);                   
245         } else {
246             OUT_BCS_BATCH(batch, 0);
247         }
248     }
249     OUT_BCS_BATCH(batch, 0);                                                                                    /* no block status  */
250
251     OUT_BCS_BATCH(batch, 0);
252
253     ADVANCE_BCS_BATCH(batch);
254 }
255
256
257 static void
258 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
259                 struct gen6_encoder_context *gen6_encoder_context)
260 {
261     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
262     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
263     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
264
265     BEGIN_BCS_BATCH(batch, 26);
266
267     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
268         /* the DW1-3 is for the MFX indirect bistream offset */
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272         /* the DW4-5 is the MFX upper bound */
273     OUT_BCS_BATCH(batch, 0);
274     OUT_BCS_BATCH(batch, 0);
275
276     /* the DW6-10 is for MFX Indirect MV Object Base Address */
277     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
278     OUT_BCS_BATCH(batch, 0);
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
281     OUT_BCS_BATCH(batch, 0);
282
283      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289
290      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296
297     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
298     OUT_BCS_RELOC(batch,
299                   mfc_context->mfc_indirect_pak_bse_object.bo,
300                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
301                   0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304         
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x00000000);
307
308     ADVANCE_BCS_BATCH(batch);
309 }
310
311 static void
312 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
313 {
314     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
315     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
316     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
317     struct i965_driver_data *i965 = i965_driver_data(ctx);
318
319     if (IS_STEPPING_BPLUS(i965)) {
320         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context);
321         return;
322     }
323
324     BEGIN_BCS_BATCH(batch, 11);
325
326     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
327     OUT_BCS_BATCH(batch, 0);
328     OUT_BCS_BATCH(batch, 0);
329     /* MFX Indirect MV Object Base Address */
330     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
331     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
332     OUT_BCS_BATCH(batch, 0);
333     OUT_BCS_BATCH(batch, 0);
334     OUT_BCS_BATCH(batch, 0);
335     OUT_BCS_BATCH(batch, 0);
336     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
337     OUT_BCS_RELOC(batch,
338                   mfc_context->mfc_indirect_pak_bse_object.bo,
339                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                   0);
341     OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
342
343     ADVANCE_BCS_BATCH(batch);
344 }
345
346 static void
347 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
348                 struct gen6_encoder_context *gen6_encoder_context)
349 {
350     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
351     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
352
353     BEGIN_BCS_BATCH(batch, 10);
354
355     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
356     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
357                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
358                   0);
359     OUT_BCS_BATCH(batch, 0);
360     OUT_BCS_BATCH(batch, 0);
361         
362         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365     OUT_BCS_BATCH(batch, 0);
366
367         /* the DW7-9 is for Bitplane Read Buffer Base Address */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371
372     ADVANCE_BCS_BATCH(batch);
373 }
374
375 static void
376 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
377 {
378     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
379     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
380     struct i965_driver_data *i965 = i965_driver_data(ctx);
381
382     if (IS_STEPPING_BPLUS(i965)) {
383         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context);
384         return;
385     }
386  
387
388     BEGIN_BCS_BATCH(batch, 4);
389
390     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
391     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
392                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
393                   0);
394     OUT_BCS_BATCH(batch, 0);
395     OUT_BCS_BATCH(batch, 0);
396
397     ADVANCE_BCS_BATCH(batch);
398 }
399
400 static void
401 gen75_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
402 {
403     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
404     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
405
406     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
407     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
408
409     BEGIN_BCS_BATCH(batch, 16);
410     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
411     OUT_BCS_BATCH(batch,
412                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
413     OUT_BCS_BATCH(batch, 
414                   ((height_in_mbs - 1) << 16) | 
415                   ((width_in_mbs - 1) << 0));
416     OUT_BCS_BATCH(batch, 
417                   (0 << 24) |   /* Second Chroma QP Offset */
418                   (0 << 16) |   /* Chroma QP Offset */
419                   (0 << 14) |   /* Max-bit conformance Intra flag */
420                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
421                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
422                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
423                   (0 << 8)  |   /* FIXME: Image Structure */
424                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
425     OUT_BCS_BATCH(batch,
426                   (0 << 16) |   /* Mininum Frame size */
427                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
428                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
429                   (0 << 13) |   /* CABAC 0 word insertion test enable */
430                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
431                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
432                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
433                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
434                   (0 << 6)  |   /* Only valid for VLD decoding mode */
435                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
436                   (0 << 4)  |   /* Direct 8x8 inference flag */
437                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
438                   (1 << 2)  |   /* Frame MB only flag */
439                   (0 << 1)  |   /* MBAFF mode is in active */
440                   (0 << 0));    /* Field picture flag */
441     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
442     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
443                   (0xBB8 << 16) |       /* InterMbMaxSz */
444                   (0xEE8) );            /* IntraMbMaxSz */
445     OUT_BCS_BATCH(batch, 0);            /* Reserved */
446     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
447     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
448     OUT_BCS_BATCH(batch, 0x8C000000);
449     OUT_BCS_BATCH(batch, 0x00010000);
450     OUT_BCS_BATCH(batch, 0);
451     OUT_BCS_BATCH(batch, 0);
452     OUT_BCS_BATCH(batch, 0);
453     OUT_BCS_BATCH(batch, 0);
454
455     ADVANCE_BCS_BATCH(batch);
456 }
457
458
459 static void
460 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
461                         struct gen6_encoder_context *gen6_encoder_context)
462 {
463     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
464     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
465
466     int i;
467
468     BEGIN_BCS_BATCH(batch, 71);
469
470     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
471
472     /* Reference frames and Current frames */
473     /* the DW1-32 is for the direct MV for reference */
474     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
475         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
476             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
477                           I915_GEM_DOMAIN_INSTRUCTION, 0,
478                           0);
479             OUT_BCS_BATCH(batch, 0);
480         } else {
481             OUT_BCS_BATCH(batch, 0);
482             OUT_BCS_BATCH(batch, 0);
483         }
484     }
485         OUT_BCS_BATCH(batch, 0);
486
487         /* the DW34-36 is the MV for the current reference */
488         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
489                           I915_GEM_DOMAIN_INSTRUCTION, 0,
490                           0);
491
492         OUT_BCS_BATCH(batch, 0);
493         OUT_BCS_BATCH(batch, 0);
494
495     /* POL list */
496     for(i = 0; i < 32; i++) {
497         OUT_BCS_BATCH(batch, i/2);
498     }
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
506 {
507     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
508     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
509     int i;
510     struct i965_driver_data *i965 = i965_driver_data(ctx);
511
512     if (IS_STEPPING_BPLUS(i965)) {
513         gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context);
514         return;
515     }
516
517     BEGIN_BCS_BATCH(batch, 69);
518
519     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
520     //TODO: reference DMV
521     for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
522         if (mfc_context->direct_mv_buffers[i].bo)
523                 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
524                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
525                   0);
526         else
527                 OUT_BCS_BATCH(batch, 0);
528     }
529
530     //TODO: current DMV just for test
531 #if 0
532     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
533                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
534                   0);
535 #else
536     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
537     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
538     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
539                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
540                   0);
541 #endif
542
543
544     OUT_BCS_BATCH(batch, 0);
545
546     //TODO: POL list
547     for(i = 0; i < 34; i++) {
548         OUT_BCS_BATCH(batch, 0);
549     }
550
551     ADVANCE_BCS_BATCH(batch);
552 }
553
554 static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
555                                      int intra_slice,
556                                      struct gen6_encoder_context *gen6_encoder_context)
557 {
558     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
559     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
560
561     BEGIN_BCS_BATCH(batch, 11);;
562
563     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
564
565     if ( intra_slice )
566         OUT_BCS_BATCH(batch, 2);                        /*Slice Type: I Slice*/
567     else
568         OUT_BCS_BATCH(batch, 0);                        /*Slice Type: P Slice*/
569
570     if ( intra_slice )
571         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
572     else 
573         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
574
575     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
576                   (26<<16) |                    /*Slice Quantization Parameter*/
577                   0x0202 );
578     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
579     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
580
581     OUT_BCS_BATCH(batch, 
582                   (0<<31) |             /*RateControlCounterEnable = disable*/
583                   (1<<30) |             /*ResetRateControlCounter*/
584                   (2<<28) |             /*RC Triggle Mode = Loose Rate Control*/
585                   (1<<19) |             /*IsLastSlice*/
586                   (0<<18) |             /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
587                   (0<<17) |             /*HeaderPresentFlag*/   
588                   (1<<16) |             /*SliceData PresentFlag*/
589                   (0<<15) |             /*TailPresentFlag*/
590                   (1<<13) |             /*RBSP NAL TYPE*/       
591                   (0<<12) );            /*CabacZeroWordInsertionEnable*/
592         
593
594     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
595
596     OUT_BCS_BATCH(batch, 0);
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen75_mfc_qm_state(VADriverContextP ctx,
605                   int qm_type,
606                   unsigned int *qm,
607                   int qm_length,
608                   struct gen6_encoder_context *gen6_encoder_context)
609 {
610     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
611     unsigned int qm_buffer[16];
612
613     assert(qm_length <= 16);
614     assert(sizeof(*qm) == 4);
615     memcpy(qm_buffer, qm, qm_length * 4);
616
617     BEGIN_BCS_BATCH(batch, 18);
618     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
619     OUT_BCS_BATCH(batch, qm_type << 0);
620     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
621     ADVANCE_BCS_BATCH(batch);
622 }
623
624 static void gen75_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
625 {
626     unsigned int qm[16] = {
627         0x10101010, 0x10101010, 0x10101010, 0x10101010,
628         0x10101010, 0x10101010, 0x10101010, 0x10101010,
629         0x10101010, 0x10101010, 0x10101010, 0x10101010,
630         0x10101010, 0x10101010, 0x10101010, 0x10101010
631     };
632
633     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
634     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
635     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
636     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
637 }
638
639 static void
640 gen75_mfc_fqm_state(VADriverContextP ctx,
641                    int fqm_type,
642                    unsigned int *fqm,
643                    int fqm_length,
644                    struct gen6_encoder_context *gen6_encoder_context)
645 {
646     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
647     unsigned int fqm_buffer[32];
648
649     assert(fqm_length <= 32);
650     assert(sizeof(*fqm) == 4);
651     memcpy(fqm_buffer, fqm, fqm_length * 4);
652
653     BEGIN_BCS_BATCH(batch, 34);
654     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
655     OUT_BCS_BATCH(batch, fqm_type << 0);
656     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
657     ADVANCE_BCS_BATCH(batch);
658 }
659
660 static void gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
661 {
662     unsigned int qm[32] = {
663         0x10001000, 0x10001000, 0x10001000, 0x10001000,
664         0x10001000, 0x10001000, 0x10001000, 0x10001000,
665         0x10001000, 0x10001000, 0x10001000, 0x10001000,
666         0x10001000, 0x10001000, 0x10001000, 0x10001000,
667         0x10001000, 0x10001000, 0x10001000, 0x10001000,
668         0x10001000, 0x10001000, 0x10001000, 0x10001000,
669         0x10001000, 0x10001000, 0x10001000, 0x10001000,
670         0x10001000, 0x10001000, 0x10001000, 0x10001000
671     };
672
673     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
674     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
675     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
676     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
677 }
678
679 static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
680 {
681     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
682     int i;
683
684     BEGIN_BCS_BATCH(batch, 10);
685
686     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
687     OUT_BCS_BATCH(batch, 0);                  //Select L0
688
689     OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
690     for(i = 0; i < 7; i++) {
691         OUT_BCS_BATCH(batch, 0x80808080);
692     }
693
694     ADVANCE_BCS_BATCH(batch);
695 }
696         
697 static void
698 gen75_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
699 {
700     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
701
702     BEGIN_BCS_BATCH(batch, 4);
703
704     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
705     OUT_BCS_BATCH(batch, (32<<8) | 
706                   (1 << 3) |
707                   (1 << 2) |
708                   (flush_data << 1) |
709                   (1<<0) );
710     OUT_BCS_BATCH(batch, 0x00000003);
711     OUT_BCS_BATCH(batch, 0xABCD1234);
712
713     ADVANCE_BCS_BATCH(batch);
714 }
715
716 static int
717 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
718                               struct gen6_encoder_context *gen6_encoder_context)
719 {
720     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
721     int len_in_dwords = 12;
722
723     unsigned int intra_msg;
724 #define         INTRA_MSG_FLAG          (1 << 13)
725 #define         INTRA_MBTYPE_MASK       (0x1F0000)
726
727     BEGIN_BCS_BATCH(batch, len_in_dwords);
728
729     intra_msg = msg[0] & 0xC0FF;
730     intra_msg |= INTRA_MSG_FLAG;
731     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
732     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
733     OUT_BCS_BATCH(batch, 0);
734     OUT_BCS_BATCH(batch, 0);
735     OUT_BCS_BATCH(batch, 
736                   (0 << 24) |           /* PackedMvNum, Debug*/
737                   (0 << 20) |           /* No motion vector */
738                   (1 << 19) |           /* CbpDcY */
739                   (1 << 18) |           /* CbpDcU */
740                   (1 << 17) |           /* CbpDcV */
741                   intra_msg);
742
743     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
744     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
745     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
746
747     /*Stuff for Intra MB*/
748     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
749     OUT_BCS_BATCH(batch, msg[2]);       
750     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
751
752     OUT_BCS_BATCH(batch, 0x00000);      /*MaxSizeInWord and TargetSzieInWord*/
753         OUT_BCS_BATCH(batch, 0);
754
755     ADVANCE_BCS_BATCH(batch);
756
757     return len_in_dwords;
758 }
759
760 static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
761         unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context)
762 {
763     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
764     int len_in_dwords = 12;
765     unsigned int inter_msg;
766
767     BEGIN_BCS_BATCH(batch, len_in_dwords);
768
769     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
770
771         inter_msg = 32;
772         /* MV quantity */
773         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
774                 if (msg[1] & SUBMB_SHAPE_MASK)
775                         inter_msg = 128;
776         }
777     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
778
779     OUT_BCS_BATCH(batch, offset);
780         inter_msg = msg[0] & (0x1F00FFFF);
781         inter_msg |= INTER_MV8;
782         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
783                         (msg[1] & SUBMB_SHAPE_MASK)) {
784                 inter_msg |= INTER_MV32;
785         }
786
787     OUT_BCS_BATCH(batch, inter_msg);
788
789     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
790     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
791     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
792
793     /*Stuff for Inter MB*/
794         inter_msg = msg[1] >> 8;
795     OUT_BCS_BATCH(batch, inter_msg);        
796     OUT_BCS_BATCH(batch, 0x0);    
797     OUT_BCS_BATCH(batch, 0x0);        
798
799     OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
800
801     OUT_BCS_BATCH(batch, 0x0);        
802
803     ADVANCE_BCS_BATCH(batch);
804
805     return len_in_dwords;
806 }
807
808 static void gen75_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
809 {
810     struct i965_driver_data *i965 = i965_driver_data(ctx);
811     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
812     dri_bo *bo;
813     int i;
814
815     /*Encode common setup for MFC*/
816     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
817     mfc_context->post_deblocking_output.bo = NULL;
818
819     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
820     mfc_context->pre_deblocking_output.bo = NULL;
821
822     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
823     mfc_context->uncompressed_picture_source.bo = NULL;
824
825     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
826     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
827
828     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
829         if (mfc_context->reference_surfaces[i].bo != NULL)
830             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
831         mfc_context->reference_surfaces[i].bo = NULL;  
832     }
833
834     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
835     bo = dri_bo_alloc(i965->intel.bufmgr,
836                       "Buffer",
837                       128 * 64,
838                       64);
839     assert(bo);
840     mfc_context->intra_row_store_scratch_buffer.bo = bo;
841
842     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
843     bo = dri_bo_alloc(i965->intel.bufmgr,
844                       "Buffer",
845                       49152,  /* 6 * 128 * 64 */
846                       64);
847     assert(bo);
848     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
849
850     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
851     bo = dri_bo_alloc(i965->intel.bufmgr,
852                       "Buffer",
853                       12288, /* 1.5 * 128 * 64 */
854                       0x1000);
855     assert(bo);
856     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
857 }
858
859 #define         INTRA_RDO_OFFSET        4
860 #define         INTER_RDO_OFFSET        54
861 #define         INTER_MSG_OFFSET        52
862 #define         INTER_MV_OFFSET         224
863 #define         RDO_MASK                0xFFFF
864
865 static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
866                                       struct encode_state *encode_state,
867                                       struct gen6_encoder_context *gen6_encoder_context)
868 {
869     struct i965_driver_data *i965 = i965_driver_data(ctx);
870     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
871     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
872     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
873     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
874     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
875     unsigned int *msg = NULL, offset = 0;
876     unsigned char *msg_ptr = NULL;
877     int emit_new_state = 1, object_len_in_bytes;
878     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
879     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
880     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
881     int x,y, mb_index;
882     int inter_rdo, intra_rdo;
883
884     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
885
886     dri_bo_map(vme_context->vme_output.bo , 1);
887     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
888     if (is_intra) {
889         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
890     } else {
891         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
892         offset = 0; 
893     }
894
895     for (y = 0; y < height_in_mbs; y++) {
896         for (x = 0; x < width_in_mbs; x++) { 
897             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
898             int qp = pSequenceParameter->initial_qp;
899              mb_index = (y * width_in_mbs) + x;
900             if (emit_new_state) {
901                 intel_batchbuffer_emit_mi_flush(batch);
902                 
903                     gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
904                     gen75_mfc_surface_state(ctx, gen6_encoder_context);
905                     gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
906
907                 gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
908                 gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
909
910                     gen75_mfc_avc_img_state(ctx, gen6_encoder_context);
911                     gen75_mfc_avc_qm_state(ctx, gen6_encoder_context);
912                     gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context);
913                     gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context);
914
915                 gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
916                 gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
917                 emit_new_state = 0;
918             }
919
920             msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
921             if (is_intra) {
922                 object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
923             } else {
924                 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
925                 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
926                 if (intra_rdo < inter_rdo) {
927                         object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
928                 } else {
929                         msg += INTER_MSG_OFFSET;
930                         offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
931                         object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context);
932                 }
933             }
934             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
935                 intel_batchbuffer_end_atomic(batch);
936                 intel_batchbuffer_flush(batch);
937                 emit_new_state = 1;
938                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
939             }
940         }
941     }
942
943     dri_bo_unmap(vme_context->vme_output.bo);
944         
945     intel_batchbuffer_end_atomic(batch);
946 }
947
948 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
949                                      struct encode_state *encode_state,
950                                      struct gen6_encoder_context *gen6_encoder_context)
951 {
952     struct i965_driver_data *i965 = i965_driver_data(ctx);
953     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
954     struct object_surface *obj_surface; 
955     struct object_buffer *obj_buffer;
956     dri_bo *bo;
957     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
958     VAStatus vaStatus = VA_STATUS_SUCCESS;
959
960     /*Setup all the input&output object*/
961     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
962     assert(obj_surface);
963     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
964     mfc_context->post_deblocking_output.bo = obj_surface->bo;
965     dri_bo_reference(mfc_context->post_deblocking_output.bo);
966
967     mfc_context->surface_state.width = obj_surface->orig_width;
968     mfc_context->surface_state.height = obj_surface->orig_height;
969     mfc_context->surface_state.w_pitch = obj_surface->width;
970     mfc_context->surface_state.h_pitch = obj_surface->height;
971
972     obj_surface = SURFACE(pPicParameter->reference_picture);
973     assert(obj_surface);
974     if (obj_surface->bo != NULL) {
975         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
976         dri_bo_reference(obj_surface->bo);
977     }
978         
979     obj_surface = SURFACE(encode_state->current_render_target);
980     assert(obj_surface && obj_surface->bo);
981     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
982     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
983
984     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
985     bo = obj_buffer->buffer_store->bo;
986     assert(bo);
987     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
988     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
989     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
990
991     /*Programing bcs pipeline*/
992     gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
993         
994     return vaStatus;
995 }
996
997 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
998                              struct encode_state *encode_state,
999                              struct gen6_encoder_context *gen6_encoder_context)
1000 {
1001     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1002
1003     intel_batchbuffer_flush(batch);             //run the pipeline
1004
1005     return VA_STATUS_SUCCESS;
1006 }
1007
1008 static VAStatus gen75_mfc_stop(VADriverContextP ctx, 
1009                               struct encode_state *encode_state,
1010                               struct gen6_encoder_context *gen6_encoder_context)
1011 {
1012 #if 0
1013     struct i965_driver_data *i965 = i965_driver_data(ctx);
1014     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1015         
1016     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1017         
1018     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1019     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1020     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1021     my_debug(obj_surface);
1022
1023 #endif
1024
1025     return VA_STATUS_SUCCESS;
1026 }
1027
1028 static VAStatus
1029 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1030                             struct encode_state *encode_state,
1031                             struct gen6_encoder_context *gen6_encoder_context)
1032 {
1033     gen75_mfc_init(ctx, gen6_encoder_context);
1034     gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1035     gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
1036     gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
1037
1038     return VA_STATUS_SUCCESS;
1039 }
1040
1041 VAStatus
1042 gen75_mfc_pipeline(VADriverContextP ctx,
1043                   VAProfile profile,
1044                   struct encode_state *encode_state,
1045                   struct gen6_encoder_context *gen6_encoder_context)
1046 {
1047     VAStatus vaStatus;
1048
1049     switch (profile) {
1050     case VAProfileH264Baseline:
1051         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1052         break;
1053
1054         /* FIXME: add for other profile */
1055     default:
1056         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1057         break;
1058     }
1059
1060     return vaStatus;
1061 }
1062
1063 Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1064 {
1065     int i;
1066     struct i965_driver_data *i965 = i965_driver_data(ctx);
1067     dri_bo *bo;
1068         
1069     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1070         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1071         mfc_context->direct_mv_buffers[i].bo = NULL;
1072     }
1073     bo = dri_bo_alloc(i965->intel.bufmgr,
1074                         "Buffer",
1075                          68*8192,
1076                          64);
1077     mfc_context->direct_mv_buffers[0].bo = bo;
1078     bo = dri_bo_alloc(i965->intel.bufmgr,
1079                         "Buffer",
1080                          68*8192,
1081                          64);
1082     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
1083     return True;
1084 }
1085
1086 Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1087 {
1088     int i;
1089
1090     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1091     mfc_context->post_deblocking_output.bo = NULL;
1092
1093     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1094     mfc_context->pre_deblocking_output.bo = NULL;
1095
1096     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1097     mfc_context->uncompressed_picture_source.bo = NULL;
1098
1099     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1100     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1101
1102     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1103         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1104         mfc_context->direct_mv_buffers[i].bo = NULL;
1105     }
1106
1107     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1108     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1109
1110     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1111     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1112
1113     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1114     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1115
1116     return True;
1117 }