Remove the hard coded value to suppor the 4Kx4K encoding
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "assert.h"
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41
42 #define B0_STEP_REV             2
43 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
44
45 static void
46 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
47                           int standard_select,
48                           struct gen6_encoder_context *gen6_encoder_context)
49 {
50     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
51
52     assert(standard_select == MFX_FORMAT_MPEG2 ||
53            standard_select == MFX_FORMAT_AVC);
54
55     BEGIN_BCS_BATCH(batch, 5);
56     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
57     OUT_BCS_BATCH(batch,
58                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
59                   (MFD_MODE_VLD << 15) | /* VLD mode */
60                   (0 << 10) | /* disable Stream-Out */
61                   (1 << 9)  | /* Post Deblocking Output */
62                   (0 << 8)  | /* Pre Deblocking Output */
63                   (0 << 5)  | /* not in stitch mode */
64                   (1 << 4)  | /* encoding mode */
65                   (standard_select << 0));  /* standard select: avc or mpeg2 */
66     OUT_BCS_BATCH(batch,
67                   (0 << 7)  | /* expand NOA bus flag */
68                   (0 << 6)  | /* disable slice-level clock gating */
69                   (0 << 5)  | /* disable clock gating for NOA */
70                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
71                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
72                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
73                   (0 << 1)  |
74                   (0 << 0));
75     OUT_BCS_BATCH(batch, 0);
76     OUT_BCS_BATCH(batch, 0);
77
78     ADVANCE_BCS_BATCH(batch);
79 }
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         SUBMB_SHAPE_MASK        0x00FF00
84
85 #define         INTER_MV8               (4 << 20)
86 #define         INTER_MV32              (6 << 20)
87
88
89 static void
90 gen75_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
91 {
92     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
94
95     BEGIN_BCS_BATCH(batch, 6);
96
97     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
98     OUT_BCS_BATCH(batch, 0);
99     OUT_BCS_BATCH(batch,
100                   ((mfc_context->surface_state.height - 1) << 18) |
101                   ((mfc_context->surface_state.width - 1) << 4));
102     OUT_BCS_BATCH(batch,
103                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
104                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
105                   (0 << 22) | /* surface object control state, FIXME??? */
106                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
107                   (0 << 2)  | /* must be 0 for interleave U/V */
108                   (1 << 1)  | /* must be tiled */
109                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
110     OUT_BCS_BATCH(batch,
111                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
112                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
113     OUT_BCS_BATCH(batch, 0);
114     ADVANCE_BCS_BATCH(batch);
115 }
116
117 static void
118 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
119                 struct gen6_encoder_context *gen6_encoder_context)
120 {
121     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
122     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
123     int i;
124
125     BEGIN_BCS_BATCH(batch, 61);
126
127     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
128
129     /* the DW1-3 is for pre_deblocking */
130         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
131
132         OUT_BCS_BATCH(batch, 0);
133         OUT_BCS_BATCH(batch, 0);
134      /* the DW4-6 is for the post_deblocking */
135
136     if (mfc_context->post_deblocking_output.bo)
137         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
138                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
139                       0);                                                                                       /* post output addr  */ 
140     else
141         OUT_BCS_BATCH(batch, 0);
142
143         OUT_BCS_BATCH(batch, 0);
144         OUT_BCS_BATCH(batch, 0);
145
146      /* the DW7-9 is for the uncompressed_picture */
147     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
148                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
149                   0); /* uncompressed data */
150
151         OUT_BCS_BATCH(batch, 0);
152         OUT_BCS_BATCH(batch, 0);
153
154      /* the DW10-12 is for the mb status */
155         OUT_BCS_BATCH(batch, 0);
156         OUT_BCS_BATCH(batch, 0);
157         OUT_BCS_BATCH(batch, 0);
158
159      /* the DW13-15 is for the intra_row_store_scratch */
160     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
161                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
162                   0);   
163         OUT_BCS_BATCH(batch, 0);
164         OUT_BCS_BATCH(batch, 0);
165
166      /* the DW16-18 is for the deblocking filter */
167     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
168                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
169                   0);
170         OUT_BCS_BATCH(batch, 0);
171         OUT_BCS_BATCH(batch, 0);
172
173     /* the DW 19-50 is for Reference pictures*/
174     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
175         if ( mfc_context->reference_surfaces[i].bo != NULL) {
176             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
177                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                           0);                   
179         } else {
180             OUT_BCS_BATCH(batch, 0);
181         }
182         OUT_BCS_BATCH(batch, 0);
183     }
184         OUT_BCS_BATCH(batch, 0);
185
186         /* The DW 52-54 is for the MB status buffer */
187         OUT_BCS_BATCH(batch, 0);
188         
189         OUT_BCS_BATCH(batch, 0);
190         OUT_BCS_BATCH(batch, 0);
191
192         /* the DW 55-57 is the ILDB buffer */
193         OUT_BCS_BATCH(batch, 0);
194         OUT_BCS_BATCH(batch, 0);
195         OUT_BCS_BATCH(batch, 0);
196
197         /* the DW 58-60 is the second ILDB buffer */
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_BATCH(batch, 0);
200         OUT_BCS_BATCH(batch, 0);
201     ADVANCE_BCS_BATCH(batch);
202 }
203
204 static void
205 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
206 {
207     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
208     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
209     int i;
210
211     struct i965_driver_data *i965 = i965_driver_data(ctx);
212  
213     if (IS_STEPPING_BPLUS(i965)) {
214         gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context);
215         return;
216     }
217
218     BEGIN_BCS_BATCH(batch, 25);
219
220     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
221
222     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
223
224     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
225                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                   0);                                                                                   /* post output addr  */ 
227
228     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
229                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
230                   0);                                                                                   /* uncompressed data */
231
232     OUT_BCS_BATCH(batch, 0);                                                                                    /* StreamOut data*/
233     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
234                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
235                   0);   
236     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
237                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238                   0);
239     /* 7..22 Reference pictures*/
240     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
241         if ( mfc_context->reference_surfaces[i].bo != NULL) {
242             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
243                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
244                           0);                   
245         } else {
246             OUT_BCS_BATCH(batch, 0);
247         }
248     }
249     OUT_BCS_BATCH(batch, 0);                                                                                    /* no block status  */
250
251     OUT_BCS_BATCH(batch, 0);
252
253     ADVANCE_BCS_BATCH(batch);
254 }
255
256
257 static void
258 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
259                 struct gen6_encoder_context *gen6_encoder_context)
260 {
261     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
262     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
263     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
264
265     BEGIN_BCS_BATCH(batch, 26);
266
267     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
268         /* the DW1-3 is for the MFX indirect bistream offset */
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271     OUT_BCS_BATCH(batch, 0);
272         /* the DW4-5 is the MFX upper bound */
273     OUT_BCS_BATCH(batch, 0);
274     OUT_BCS_BATCH(batch, 0);
275
276     /* the DW6-10 is for MFX Indirect MV Object Base Address */
277     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
278     OUT_BCS_BATCH(batch, 0);
279     OUT_BCS_BATCH(batch, 0);
280     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
281     OUT_BCS_BATCH(batch, 0);
282
283      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289
290      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296
297     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
298     OUT_BCS_RELOC(batch,
299                   mfc_context->mfc_indirect_pak_bse_object.bo,
300                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
301                   0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304         
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x00000000);
307
308     ADVANCE_BCS_BATCH(batch);
309 }
310
311 static void
312 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
313 {
314     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
315     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
316     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
317     struct i965_driver_data *i965 = i965_driver_data(ctx);
318
319     if (IS_STEPPING_BPLUS(i965)) {
320         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context);
321         return;
322     }
323
324     BEGIN_BCS_BATCH(batch, 11);
325
326     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
327     OUT_BCS_BATCH(batch, 0);
328     OUT_BCS_BATCH(batch, 0);
329     /* MFX Indirect MV Object Base Address */
330     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
331     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
332     OUT_BCS_BATCH(batch, 0);
333     OUT_BCS_BATCH(batch, 0);
334     OUT_BCS_BATCH(batch, 0);
335     OUT_BCS_BATCH(batch, 0);
336     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
337     OUT_BCS_RELOC(batch,
338                   mfc_context->mfc_indirect_pak_bse_object.bo,
339                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                   0);
341     OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
342
343     ADVANCE_BCS_BATCH(batch);
344 }
345
346 static void
347 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
348                 struct gen6_encoder_context *gen6_encoder_context)
349 {
350     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
351     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
352
353     BEGIN_BCS_BATCH(batch, 10);
354
355     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
356     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
357                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
358                   0);
359     OUT_BCS_BATCH(batch, 0);
360     OUT_BCS_BATCH(batch, 0);
361         
362         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365     OUT_BCS_BATCH(batch, 0);
366
367         /* the DW7-9 is for Bitplane Read Buffer Base Address */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371
372     ADVANCE_BCS_BATCH(batch);
373 }
374
375 static void
376 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
377 {
378     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
379     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
380     struct i965_driver_data *i965 = i965_driver_data(ctx);
381
382     if (IS_STEPPING_BPLUS(i965)) {
383         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context);
384         return;
385     }
386  
387
388     BEGIN_BCS_BATCH(batch, 4);
389
390     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
391     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
392                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
393                   0);
394     OUT_BCS_BATCH(batch, 0);
395     OUT_BCS_BATCH(batch, 0);
396
397     ADVANCE_BCS_BATCH(batch);
398 }
399
400 static void
401 gen75_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
402 {
403     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
404     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
405
406     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
407     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
408
409     BEGIN_BCS_BATCH(batch, 16);
410     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
411     OUT_BCS_BATCH(batch,
412                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
413     OUT_BCS_BATCH(batch, 
414                   ((height_in_mbs - 1) << 16) | 
415                   ((width_in_mbs - 1) << 0));
416     OUT_BCS_BATCH(batch, 
417                   (0 << 24) |   /* Second Chroma QP Offset */
418                   (0 << 16) |   /* Chroma QP Offset */
419                   (0 << 14) |   /* Max-bit conformance Intra flag */
420                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
421                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
422                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
423                   (0 << 8)  |   /* FIXME: Image Structure */
424                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
425     OUT_BCS_BATCH(batch,
426                   (0 << 16) |   /* Mininum Frame size */
427                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
428                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
429                   (0 << 13) |   /* CABAC 0 word insertion test enable */
430                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
431                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
432                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
433                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
434                   (0 << 6)  |   /* Only valid for VLD decoding mode */
435                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
436                   (0 << 4)  |   /* Direct 8x8 inference flag */
437                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
438                   (1 << 2)  |   /* Frame MB only flag */
439                   (0 << 1)  |   /* MBAFF mode is in active */
440                   (0 << 0));    /* Field picture flag */
441     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
442     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
443                   (0xBB8 << 16) |       /* InterMbMaxSz */
444                   (0xEE8) );            /* IntraMbMaxSz */
445     OUT_BCS_BATCH(batch, 0);            /* Reserved */
446     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
447     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
448     OUT_BCS_BATCH(batch, 0x8C000000);
449     OUT_BCS_BATCH(batch, 0x00010000);
450     OUT_BCS_BATCH(batch, 0);
451     OUT_BCS_BATCH(batch, 0);
452     OUT_BCS_BATCH(batch, 0);
453     OUT_BCS_BATCH(batch, 0);
454
455     ADVANCE_BCS_BATCH(batch);
456 }
457
458
459 static void
460 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
461                         struct gen6_encoder_context *gen6_encoder_context)
462 {
463     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
464     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
465
466     int i;
467
468     BEGIN_BCS_BATCH(batch, 71);
469
470     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
471
472     /* Reference frames and Current frames */
473     /* the DW1-32 is for the direct MV for reference */
474     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
475         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
476             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
477                           I915_GEM_DOMAIN_INSTRUCTION, 0,
478                           0);
479             OUT_BCS_BATCH(batch, 0);
480         } else {
481             OUT_BCS_BATCH(batch, 0);
482             OUT_BCS_BATCH(batch, 0);
483         }
484     }
485         OUT_BCS_BATCH(batch, 0);
486
487         /* the DW34-36 is the MV for the current reference */
488         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
489                           I915_GEM_DOMAIN_INSTRUCTION, 0,
490                           0);
491
492         OUT_BCS_BATCH(batch, 0);
493         OUT_BCS_BATCH(batch, 0);
494
495     /* POL list */
496     for(i = 0; i < 32; i++) {
497         OUT_BCS_BATCH(batch, i/2);
498     }
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
506 {
507     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
508     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
509     int i;
510     struct i965_driver_data *i965 = i965_driver_data(ctx);
511
512     if (IS_STEPPING_BPLUS(i965)) {
513         gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context);
514         return;
515     }
516
517     BEGIN_BCS_BATCH(batch, 69);
518
519     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
520     //TODO: reference DMV
521     for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
522         if (mfc_context->direct_mv_buffers[i].bo)
523                 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
524                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
525                   0);
526         else
527                 OUT_BCS_BATCH(batch, 0);
528     }
529
530     //TODO: current DMV just for test
531 #if 0
532     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
533                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
534                   0);
535 #else
536     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
537     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
538     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
539                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
540                   0);
541 #endif
542
543
544     OUT_BCS_BATCH(batch, 0);
545
546     //TODO: POL list
547     for(i = 0; i < 34; i++) {
548         OUT_BCS_BATCH(batch, 0);
549     }
550
551     ADVANCE_BCS_BATCH(batch);
552 }
553
554 static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
555                                      int intra_slice,
556                                      struct gen6_encoder_context *gen6_encoder_context)
557 {
558     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
559     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
560
561     BEGIN_BCS_BATCH(batch, 11);;
562
563     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
564
565     if ( intra_slice )
566         OUT_BCS_BATCH(batch, 2);                        /*Slice Type: I Slice*/
567     else
568         OUT_BCS_BATCH(batch, 0);                        /*Slice Type: P Slice*/
569
570     if ( intra_slice )
571         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
572     else 
573         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
574
575     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
576                   (26<<16) |                    /*Slice Quantization Parameter*/
577                   0x0202 );
578     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
579     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
580
581     OUT_BCS_BATCH(batch, 
582                   (0<<31) |             /*RateControlCounterEnable = disable*/
583                   (1<<30) |             /*ResetRateControlCounter*/
584                   (2<<28) |             /*RC Triggle Mode = Loose Rate Control*/
585                   (1<<19) |             /*IsLastSlice*/
586                   (0<<18) |             /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
587                   (0<<17) |             /*HeaderPresentFlag*/   
588                   (1<<16) |             /*SliceData PresentFlag*/
589                   (0<<15) |             /*TailPresentFlag*/
590                   (1<<13) |             /*RBSP NAL TYPE*/       
591                   (0<<12) );            /*CabacZeroWordInsertionEnable*/
592         
593
594     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
595
596     OUT_BCS_BATCH(batch, 0);
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen75_mfc_qm_state(VADriverContextP ctx,
605                   int qm_type,
606                   unsigned int *qm,
607                   int qm_length,
608                   struct gen6_encoder_context *gen6_encoder_context)
609 {
610     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
611     unsigned int qm_buffer[16];
612
613     assert(qm_length <= 16);
614     assert(sizeof(*qm) == 4);
615     memcpy(qm_buffer, qm, qm_length * 4);
616
617     BEGIN_BCS_BATCH(batch, 18);
618     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
619     OUT_BCS_BATCH(batch, qm_type << 0);
620     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
621     ADVANCE_BCS_BATCH(batch);
622 }
623
624 static void gen75_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
625 {
626     unsigned int qm[16] = {
627         0x10101010, 0x10101010, 0x10101010, 0x10101010,
628         0x10101010, 0x10101010, 0x10101010, 0x10101010,
629         0x10101010, 0x10101010, 0x10101010, 0x10101010,
630         0x10101010, 0x10101010, 0x10101010, 0x10101010
631     };
632
633     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
634     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
635     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
636     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
637 }
638
639 static void
640 gen75_mfc_fqm_state(VADriverContextP ctx,
641                    int fqm_type,
642                    unsigned int *fqm,
643                    int fqm_length,
644                    struct gen6_encoder_context *gen6_encoder_context)
645 {
646     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
647     unsigned int fqm_buffer[32];
648
649     assert(fqm_length <= 32);
650     assert(sizeof(*fqm) == 4);
651     memcpy(fqm_buffer, fqm, fqm_length * 4);
652
653     BEGIN_BCS_BATCH(batch, 34);
654     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
655     OUT_BCS_BATCH(batch, fqm_type << 0);
656     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
657     ADVANCE_BCS_BATCH(batch);
658 }
659
660 static void gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
661 {
662     unsigned int qm[32] = {
663         0x10001000, 0x10001000, 0x10001000, 0x10001000,
664         0x10001000, 0x10001000, 0x10001000, 0x10001000,
665         0x10001000, 0x10001000, 0x10001000, 0x10001000,
666         0x10001000, 0x10001000, 0x10001000, 0x10001000,
667         0x10001000, 0x10001000, 0x10001000, 0x10001000,
668         0x10001000, 0x10001000, 0x10001000, 0x10001000,
669         0x10001000, 0x10001000, 0x10001000, 0x10001000,
670         0x10001000, 0x10001000, 0x10001000, 0x10001000
671     };
672
673     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
674     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
675     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
676     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
677 }
678
679 static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
680 {
681     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
682     int i;
683
684     BEGIN_BCS_BATCH(batch, 10);
685
686     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
687     OUT_BCS_BATCH(batch, 0);                  //Select L0
688
689     OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
690     for(i = 0; i < 7; i++) {
691         OUT_BCS_BATCH(batch, 0x80808080);
692     }
693
694     ADVANCE_BCS_BATCH(batch);
695 }
696         
697 static void
698 gen75_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
699 {
700     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
701
702     BEGIN_BCS_BATCH(batch, 4);
703
704     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
705     OUT_BCS_BATCH(batch, (32<<8) | 
706                   (1 << 3) |
707                   (1 << 2) |
708                   (flush_data << 1) |
709                   (1<<0) );
710     OUT_BCS_BATCH(batch, 0x00000003);
711     OUT_BCS_BATCH(batch, 0xABCD1234);
712
713     ADVANCE_BCS_BATCH(batch);
714 }
715
716 static int
717 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
718                               struct gen6_encoder_context *gen6_encoder_context)
719 {
720     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
721     int len_in_dwords = 12;
722
723     unsigned int intra_msg;
724 #define         INTRA_MSG_FLAG          (1 << 13)
725 #define         INTRA_MBTYPE_MASK       (0x1F0000)
726
727     BEGIN_BCS_BATCH(batch, len_in_dwords);
728
729     intra_msg = msg[0] & 0xC0FF;
730     intra_msg |= INTRA_MSG_FLAG;
731     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
732     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
733     OUT_BCS_BATCH(batch, 0);
734     OUT_BCS_BATCH(batch, 0);
735     OUT_BCS_BATCH(batch, 
736                   (0 << 24) |           /* PackedMvNum, Debug*/
737                   (0 << 20) |           /* No motion vector */
738                   (1 << 19) |           /* CbpDcY */
739                   (1 << 18) |           /* CbpDcU */
740                   (1 << 17) |           /* CbpDcV */
741                   intra_msg);
742
743     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
744     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
745     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
746
747     /*Stuff for Intra MB*/
748     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
749     OUT_BCS_BATCH(batch, msg[2]);       
750     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
751
752     OUT_BCS_BATCH(batch, 0x00000);      /*MaxSizeInWord and TargetSzieInWord*/
753         OUT_BCS_BATCH(batch, 0);
754
755     ADVANCE_BCS_BATCH(batch);
756
757     return len_in_dwords;
758 }
759
760 static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
761         unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context)
762 {
763     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
764     int len_in_dwords = 12;
765     unsigned int inter_msg;
766
767     BEGIN_BCS_BATCH(batch, len_in_dwords);
768
769     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
770
771         inter_msg = 32;
772         /* MV quantity */
773         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
774                 if (msg[1] & SUBMB_SHAPE_MASK)
775                         inter_msg = 128;
776         }
777     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
778
779     OUT_BCS_BATCH(batch, offset);
780         inter_msg = msg[0] & (0x1F00FFFF);
781         inter_msg |= INTER_MV8;
782         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
783                         (msg[1] & SUBMB_SHAPE_MASK)) {
784                 inter_msg |= INTER_MV32;
785         }
786
787     OUT_BCS_BATCH(batch, inter_msg);
788
789     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
790     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
791     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
792
793     /*Stuff for Inter MB*/
794         inter_msg = msg[1] >> 8;
795     OUT_BCS_BATCH(batch, inter_msg);        
796     OUT_BCS_BATCH(batch, 0x0);    
797     OUT_BCS_BATCH(batch, 0x0);        
798
799     OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
800
801     OUT_BCS_BATCH(batch, 0x0);        
802
803     ADVANCE_BCS_BATCH(batch);
804
805     return len_in_dwords;
806 }
807
808 static void gen75_mfc_init(VADriverContextP ctx,
809                            struct encode_state *encode_state,
810                            struct gen6_encoder_context *gen6_encoder_context)
811 {
812     struct i965_driver_data *i965 = i965_driver_data(ctx);
813     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
814     dri_bo *bo;
815     int i;
816     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
817     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
818     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
819
820     /*Encode common setup for MFC*/
821     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
822     mfc_context->post_deblocking_output.bo = NULL;
823
824     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
825     mfc_context->pre_deblocking_output.bo = NULL;
826
827     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
828     mfc_context->uncompressed_picture_source.bo = NULL;
829
830     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
831     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
832
833     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
834         if (mfc_context->reference_surfaces[i].bo != NULL)
835             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
836         mfc_context->reference_surfaces[i].bo = NULL;  
837     }
838
839     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
840     bo = dri_bo_alloc(i965->intel.bufmgr,
841                       "Buffer",
842                       width_in_mbs * 64,
843                       64);
844     assert(bo);
845     mfc_context->intra_row_store_scratch_buffer.bo = bo;
846
847     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
848     bo = dri_bo_alloc(i965->intel.bufmgr,
849                       "Buffer",
850                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
851                       64);
852     assert(bo);
853     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
854
855     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
856     bo = dri_bo_alloc(i965->intel.bufmgr,
857                       "Buffer",
858                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
859                       0x1000);
860     assert(bo);
861     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
862 }
863
864 #define         INTRA_RDO_OFFSET        4
865 #define         INTER_RDO_OFFSET        54
866 #define         INTER_MSG_OFFSET        52
867 #define         INTER_MV_OFFSET         224
868 #define         RDO_MASK                0xFFFF
869
870 static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
871                                       struct encode_state *encode_state,
872                                       struct gen6_encoder_context *gen6_encoder_context)
873 {
874     struct i965_driver_data *i965 = i965_driver_data(ctx);
875     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
876     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
877     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
878     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
879     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
880     unsigned int *msg = NULL, offset = 0;
881     unsigned char *msg_ptr = NULL;
882     int emit_new_state = 1, object_len_in_bytes;
883     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
884     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
885     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
886     int x,y, mb_index;
887     int inter_rdo, intra_rdo;
888
889     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
890
891     dri_bo_map(vme_context->vme_output.bo , 1);
892     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
893     if (is_intra) {
894         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
895     } else {
896         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
897         offset = 0; 
898     }
899
900     for (y = 0; y < height_in_mbs; y++) {
901         for (x = 0; x < width_in_mbs; x++) { 
902             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
903             int qp = pSequenceParameter->initial_qp;
904              mb_index = (y * width_in_mbs) + x;
905             if (emit_new_state) {
906                 intel_batchbuffer_emit_mi_flush(batch);
907                 
908                     gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
909                     gen75_mfc_surface_state(ctx, gen6_encoder_context);
910                     gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
911
912                 gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
913                 gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
914
915                     gen75_mfc_avc_img_state(ctx, gen6_encoder_context);
916                     gen75_mfc_avc_qm_state(ctx, gen6_encoder_context);
917                     gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context);
918                     gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context);
919
920                 gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
921                 gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context);
922                 emit_new_state = 0;
923             }
924
925             msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
926             if (is_intra) {
927                 object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
928             } else {
929                 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
930                 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
931                 if (intra_rdo < inter_rdo) {
932                         object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context);
933                 } else {
934                         msg += INTER_MSG_OFFSET;
935                         offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
936                         object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context);
937                 }
938             }
939             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
940                 intel_batchbuffer_end_atomic(batch);
941                 intel_batchbuffer_flush(batch);
942                 emit_new_state = 1;
943                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
944             }
945         }
946     }
947
948     dri_bo_unmap(vme_context->vme_output.bo);
949         
950     intel_batchbuffer_end_atomic(batch);
951 }
952
953 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
954                                      struct encode_state *encode_state,
955                                      struct gen6_encoder_context *gen6_encoder_context)
956 {
957     struct i965_driver_data *i965 = i965_driver_data(ctx);
958     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
959     struct object_surface *obj_surface; 
960     struct object_buffer *obj_buffer;
961     dri_bo *bo;
962     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
963     VAStatus vaStatus = VA_STATUS_SUCCESS;
964
965     /*Setup all the input&output object*/
966     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
967     assert(obj_surface);
968     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
969     mfc_context->post_deblocking_output.bo = obj_surface->bo;
970     dri_bo_reference(mfc_context->post_deblocking_output.bo);
971
972     mfc_context->surface_state.width = obj_surface->orig_width;
973     mfc_context->surface_state.height = obj_surface->orig_height;
974     mfc_context->surface_state.w_pitch = obj_surface->width;
975     mfc_context->surface_state.h_pitch = obj_surface->height;
976
977     obj_surface = SURFACE(pPicParameter->reference_picture);
978     assert(obj_surface);
979     if (obj_surface->bo != NULL) {
980         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
981         dri_bo_reference(obj_surface->bo);
982     }
983         
984     obj_surface = SURFACE(encode_state->current_render_target);
985     assert(obj_surface && obj_surface->bo);
986     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
987     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
988
989     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
990     bo = obj_buffer->buffer_store->bo;
991     assert(bo);
992     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
993     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
994     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
995
996     /*Programing bcs pipeline*/
997     gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
998         
999     return vaStatus;
1000 }
1001
1002 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
1003                              struct encode_state *encode_state,
1004                              struct gen6_encoder_context *gen6_encoder_context)
1005 {
1006     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1007
1008     intel_batchbuffer_flush(batch);             //run the pipeline
1009
1010     return VA_STATUS_SUCCESS;
1011 }
1012
1013 static VAStatus gen75_mfc_stop(VADriverContextP ctx, 
1014                               struct encode_state *encode_state,
1015                               struct gen6_encoder_context *gen6_encoder_context)
1016 {
1017 #if 0
1018     struct i965_driver_data *i965 = i965_driver_data(ctx);
1019     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1020         
1021     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1022         
1023     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1024     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1025     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1026     my_debug(obj_surface);
1027
1028 #endif
1029
1030     return VA_STATUS_SUCCESS;
1031 }
1032
1033 static VAStatus
1034 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1035                             struct encode_state *encode_state,
1036                             struct gen6_encoder_context *gen6_encoder_context)
1037 {
1038     gen75_mfc_init(ctx, encode_state, gen6_encoder_context);
1039     gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1040     gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
1041     gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
1042
1043     return VA_STATUS_SUCCESS;
1044 }
1045
1046 VAStatus
1047 gen75_mfc_pipeline(VADriverContextP ctx,
1048                   VAProfile profile,
1049                   struct encode_state *encode_state,
1050                   struct gen6_encoder_context *gen6_encoder_context)
1051 {
1052     VAStatus vaStatus;
1053
1054     switch (profile) {
1055     case VAProfileH264Baseline:
1056         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1057         break;
1058
1059         /* FIXME: add for other profile */
1060     default:
1061         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1062         break;
1063     }
1064
1065     return vaStatus;
1066 }
1067
1068 Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1069 {
1070     int i;
1071     struct i965_driver_data *i965 = i965_driver_data(ctx);
1072     dri_bo *bo;
1073         
1074     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1075         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1076         mfc_context->direct_mv_buffers[i].bo = NULL;
1077     }
1078     bo = dri_bo_alloc(i965->intel.bufmgr,
1079                         "Buffer",
1080                          68*8192,
1081                          64);
1082     mfc_context->direct_mv_buffers[0].bo = bo;
1083     bo = dri_bo_alloc(i965->intel.bufmgr,
1084                         "Buffer",
1085                          68*8192,
1086                          64);
1087     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
1088     return True;
1089 }
1090
1091 Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1092 {
1093     int i;
1094
1095     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1096     mfc_context->post_deblocking_output.bo = NULL;
1097
1098     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1099     mfc_context->pre_deblocking_output.bo = NULL;
1100
1101     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1102     mfc_context->uncompressed_picture_source.bo = NULL;
1103
1104     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1105     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1106
1107     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1108         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1109         mfc_context->direct_mv_buffers[i].bo = NULL;
1110     }
1111
1112     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1113     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1114
1115     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1116     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1117
1118     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1119     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1120
1121     return True;
1122 }