Fix Motion Vector
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94
95     assert(standard_select == MFX_FORMAT_MPEG2 ||
96            standard_select == MFX_FORMAT_AVC);
97
98     BEGIN_BCS_BATCH(batch, 5);
99
100     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
101     OUT_BCS_BATCH(batch,
102                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103                   (MFD_MODE_VLD << 15) | /* VLD mode */
104                   (0 << 10) | /* Stream-Out Enable */
105                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
106                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
107                   (0 << 5)  | /* not in stitch mode */
108                   (1 << 4)  | /* encoding mode */
109                   (standard_select << 0));  /* standard select: avc or mpeg2 */
110     OUT_BCS_BATCH(batch,
111                   (0 << 7)  | /* expand NOA bus flag */
112                   (0 << 6)  | /* disable slice-level clock gating */
113                   (0 << 5)  | /* disable clock gating for NOA */
114                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
115                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
116                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
117                   (0 << 1)  |
118                   (0 << 0));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch, 0);
121
122     ADVANCE_BCS_BATCH(batch);
123 }
124
125 static void
126 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
127 {
128     struct intel_batchbuffer *batch = encoder_context->base.batch;
129     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
130
131     BEGIN_BCS_BATCH(batch, 6);
132
133     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch,
136                   ((mfc_context->surface_state.height - 1) << 18) |
137                   ((mfc_context->surface_state.width - 1) << 4));
138     OUT_BCS_BATCH(batch,
139                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
140                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
141                   (0 << 22) | /* surface object control state, FIXME??? */
142                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
143                   (0 << 2)  | /* must be 0 for interleave U/V */
144                   (1 << 1)  | /* must be tiled */
145                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
146     OUT_BCS_BATCH(batch,
147                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
148                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
149     OUT_BCS_BATCH(batch, 0);
150
151     ADVANCE_BCS_BATCH(batch);
152 }
153
154 static void
155 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
156                                 struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 26);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
165         /* the DW1-3 is for the MFX indirect bistream offset */
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169         /* the DW4-5 is the MFX upper bound */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW6-10 is for MFX Indirect MV Object Base Address */
174     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
178     OUT_BCS_BATCH(batch, 0);
179
180      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201         
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
206     OUT_BCS_BATCH(batch, 0);
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
213 {
214     struct intel_batchbuffer *batch = encoder_context->base.batch;
215     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
216     struct gen6_vme_context *vme_context = encoder_context->vme_context;
217     struct i965_driver_data *i965 = i965_driver_data(ctx);
218
219     if (IS_STEPPING_BPLUS(i965)) {
220         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221         return;
222     }
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     OUT_BCS_BATCH(batch,
263                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
264     OUT_BCS_BATCH(batch, 
265                   ((height_in_mbs - 1) << 16) | 
266                   ((width_in_mbs - 1) << 0));
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
293     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
294                   (0xBB8 << 16) |       /* InterMbMaxSz */
295                   (0xEE8) );            /* IntraMbMaxSz */
296     OUT_BCS_BATCH(batch, 0);            /* Reserved */
297     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
298     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
299     OUT_BCS_BATCH(batch, 0x8C000000);
300     OUT_BCS_BATCH(batch, 0x00010000);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305
306     ADVANCE_BCS_BATCH(batch);
307 }
308
309 static void
310 gen75_mfc_qm_state(VADriverContextP ctx,
311                   int qm_type,
312                   unsigned int *qm,
313                   int qm_length,
314                   struct intel_encoder_context *encoder_context)
315 {
316     struct intel_batchbuffer *batch = encoder_context->base.batch;
317     unsigned int qm_buffer[16];
318
319     assert(qm_length <= 16);
320     assert(sizeof(*qm) == 4);
321     memcpy(qm_buffer, qm, qm_length * 4);
322
323     BEGIN_BCS_BATCH(batch, 18);
324     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
325     OUT_BCS_BATCH(batch, qm_type << 0);
326     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
327     ADVANCE_BCS_BATCH(batch);
328 }
329
330 static void
331 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
332 {
333     unsigned int qm[16] = {
334         0x10101010, 0x10101010, 0x10101010, 0x10101010,
335         0x10101010, 0x10101010, 0x10101010, 0x10101010,
336         0x10101010, 0x10101010, 0x10101010, 0x10101010,
337         0x10101010, 0x10101010, 0x10101010, 0x10101010
338     };
339
340     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
341     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
342     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
343     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
344 }
345
346 static void
347 gen75_mfc_fqm_state(VADriverContextP ctx,
348                    int fqm_type,
349                    unsigned int *fqm,
350                    int fqm_length,
351                    struct intel_encoder_context *encoder_context)
352 {
353     struct intel_batchbuffer *batch = encoder_context->base.batch;
354     unsigned int fqm_buffer[32];
355
356     assert(fqm_length <= 32);
357     assert(sizeof(*fqm) == 4);
358     memcpy(fqm_buffer, fqm, fqm_length * 4);
359
360     BEGIN_BCS_BATCH(batch, 34);
361     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
362     OUT_BCS_BATCH(batch, fqm_type << 0);
363     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
369 {
370     unsigned int qm[32] = {
371         0x10001000, 0x10001000, 0x10001000, 0x10001000,
372         0x10001000, 0x10001000, 0x10001000, 0x10001000,
373         0x10001000, 0x10001000, 0x10001000, 0x10001000,
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000
379     };
380
381     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
382     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
383     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
384     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
385 }
386
387 static void
388 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
389                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
390                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
391                            struct intel_batchbuffer *batch)
392 {
393     if (batch == NULL)
394         batch = encoder_context->base.batch;
395
396     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
397
398     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
399     OUT_BCS_BATCH(batch,
400                   (0 << 16) |   /* always start at offset 0 */
401                   (data_bits_in_last_dw << 8) |
402                   (skip_emul_byte_count << 4) |
403                   (!!emulation_flag << 3) |
404                   ((!!is_last_header) << 2) |
405                   ((!!is_end_of_slice) << 1) |
406                   (0 << 0));    /* FIXME: ??? */
407     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
408
409     ADVANCE_BCS_BATCH(batch);
410 }
411
412
413 static void gen75_mfc_init(VADriverContextP ctx,
414                         struct encode_state *encode_state,
415                         struct intel_encoder_context *encoder_context)
416 {
417     struct i965_driver_data *i965 = i965_driver_data(ctx);
418     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
419     dri_bo *bo;
420     int i;
421     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
422     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
423     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
424
425     /*Encode common setup for MFC*/
426     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
427     mfc_context->post_deblocking_output.bo = NULL;
428
429     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
430     mfc_context->pre_deblocking_output.bo = NULL;
431
432     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
433     mfc_context->uncompressed_picture_source.bo = NULL;
434
435     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
436     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
437
438     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
439         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
440         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
441         mfc_context->direct_mv_buffers[i].bo = NULL;
442     }
443
444     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
445         if (mfc_context->reference_surfaces[i].bo != NULL)
446             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
447         mfc_context->reference_surfaces[i].bo = NULL;  
448     }
449
450     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
451     bo = dri_bo_alloc(i965->intel.bufmgr,
452                       "Buffer",
453                       width_in_mbs * 64,
454                       64);
455     assert(bo);
456     mfc_context->intra_row_store_scratch_buffer.bo = bo;
457
458     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
459     bo = dri_bo_alloc(i965->intel.bufmgr,
460                       "Buffer",
461                       width_in_mbs * height_in_mbs * 16,
462                       64);
463     assert(bo);
464     mfc_context->macroblock_status_buffer.bo = bo;
465
466     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
467     bo = dri_bo_alloc(i965->intel.bufmgr,
468                       "Buffer",
469                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
470                       64);
471     assert(bo);
472     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
473
474     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
475     bo = dri_bo_alloc(i965->intel.bufmgr,
476                       "Buffer",
477                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
478                       0x1000);
479     assert(bo);
480     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
481
482     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
483     mfc_context->mfc_batchbuffer_surface.bo = NULL;
484
485     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
486     mfc_context->aux_batchbuffer_surface.bo = NULL;
487
488     if (mfc_context->aux_batchbuffer)
489         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
490
491     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
492     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
493     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
494     mfc_context->aux_batchbuffer_surface.pitch = 16;
495     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
496     mfc_context->aux_batchbuffer_surface.size_block = 16;
497
498     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
499 }
500
501 static void
502 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
503                                 struct intel_encoder_context *encoder_context)
504 {
505     struct intel_batchbuffer *batch = encoder_context->base.batch;
506     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
507     int i;
508
509     BEGIN_BCS_BATCH(batch, 61);
510
511     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
512
513     /* the DW1-3 is for pre_deblocking */
514     if (mfc_context->pre_deblocking_output.bo)
515         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
516                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
517                       0);
518     else
519         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
520
521         OUT_BCS_BATCH(batch, 0);
522         OUT_BCS_BATCH(batch, 0);
523      /* the DW4-6 is for the post_deblocking */
524
525     if (mfc_context->post_deblocking_output.bo)
526         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
527                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
528                       0);                                                                                       /* post output addr  */ 
529     else
530         OUT_BCS_BATCH(batch, 0);
531         OUT_BCS_BATCH(batch, 0);
532         OUT_BCS_BATCH(batch, 0);
533
534      /* the DW7-9 is for the uncompressed_picture */
535     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
536                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
537                   0); /* uncompressed data */
538
539         OUT_BCS_BATCH(batch, 0);
540         OUT_BCS_BATCH(batch, 0);
541
542      /* the DW10-12 is for the mb status */
543     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
544                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
545                   0); /* StreamOut data*/
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW13-15 is for the intra_row_store_scratch */
550     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0);   
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW16-18 is for the deblocking filter */
557     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563     /* the DW 19-50 is for Reference pictures*/
564     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
565         if ( mfc_context->reference_surfaces[i].bo != NULL) {
566             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
567                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                           0);                   
569         } else {
570             OUT_BCS_BATCH(batch, 0);
571         }
572         OUT_BCS_BATCH(batch, 0);
573     }
574         OUT_BCS_BATCH(batch, 0);
575
576         /* The DW 52-54 is for the MB status buffer */
577     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
578                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
579                   0);                                                                                   /* Macroblock status buffer*/
580         
581         OUT_BCS_BATCH(batch, 0);
582         OUT_BCS_BATCH(batch, 0);
583
584         /* the DW 55-57 is the ILDB buffer */
585         OUT_BCS_BATCH(batch, 0);
586         OUT_BCS_BATCH(batch, 0);
587         OUT_BCS_BATCH(batch, 0);
588
589         /* the DW 58-60 is the second ILDB buffer */
590         OUT_BCS_BATCH(batch, 0);
591         OUT_BCS_BATCH(batch, 0);
592         OUT_BCS_BATCH(batch, 0);
593     ADVANCE_BCS_BATCH(batch);
594 }
595
596 static void
597 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
598 {
599     struct intel_batchbuffer *batch = encoder_context->base.batch;
600     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
601     struct i965_driver_data *i965 = i965_driver_data(ctx);
602     int i;
603
604     if (IS_STEPPING_BPLUS(i965)) {
605         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
606         return;
607     }
608
609     BEGIN_BCS_BATCH(batch, 25);
610
611     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
612
613     if (mfc_context->pre_deblocking_output.bo)
614         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
615                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
616                       0);
617     else
618         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
619
620     if (mfc_context->post_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);                                                                                       /* post output addr  */ 
624     else
625         OUT_BCS_BATCH(batch, 0);
626
627     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
628                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
629                   0);                                                                                   /* uncompressed data */
630     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
631                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                   0);                                                                                   /* StreamOut data*/
633     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
634                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                   0);   
636     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
637                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
638                   0);
639     /* 7..22 Reference pictures*/
640     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
641         if ( mfc_context->reference_surfaces[i].bo != NULL) {
642             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
643                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
644                           0);                   
645         } else {
646             OUT_BCS_BATCH(batch, 0);
647         }
648     }
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* Macroblock status buffer*/
652
653         OUT_BCS_BATCH(batch, 0);
654
655     ADVANCE_BCS_BATCH(batch);
656 }
657
658 static void
659 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
660                                 struct intel_encoder_context *encoder_context)
661 {
662     struct intel_batchbuffer *batch = encoder_context->base.batch;
663     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
664
665     int i;
666
667     BEGIN_BCS_BATCH(batch, 71);
668
669     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
670
671     /* Reference frames and Current frames */
672     /* the DW1-32 is for the direct MV for reference */
673     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
674         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
675             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
676                           I915_GEM_DOMAIN_INSTRUCTION, 0,
677                           0);
678             OUT_BCS_BATCH(batch, 0);
679         } else {
680             OUT_BCS_BATCH(batch, 0);
681             OUT_BCS_BATCH(batch, 0);
682         }
683     }
684         OUT_BCS_BATCH(batch, 0);
685
686         /* the DW34-36 is the MV for the current reference */
687         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
688                           I915_GEM_DOMAIN_INSTRUCTION, 0,
689                           0);
690
691         OUT_BCS_BATCH(batch, 0);
692         OUT_BCS_BATCH(batch, 0);
693
694     /* POL list */
695     for(i = 0; i < 32; i++) {
696         OUT_BCS_BATCH(batch, i/2);
697     }
698     OUT_BCS_BATCH(batch, 0);
699     OUT_BCS_BATCH(batch, 0);
700
701     ADVANCE_BCS_BATCH(batch);
702 }
703
704 static void
705 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
706 {
707     struct intel_batchbuffer *batch = encoder_context->base.batch;
708     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
709     struct i965_driver_data *i965 = i965_driver_data(ctx);
710     int i;
711
712     if (IS_STEPPING_BPLUS(i965)) {
713         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
714         return;
715     }
716
717     BEGIN_BCS_BATCH(batch, 69);
718
719     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
720
721     /* Reference frames and Current frames */
722     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
723         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
724             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
725                           I915_GEM_DOMAIN_INSTRUCTION, 0,
726                           0);
727         } else {
728             OUT_BCS_BATCH(batch, 0);
729         }
730     }
731
732     /* POL list */
733     for(i = 0; i < 32; i++) {
734         OUT_BCS_BATCH(batch, i/2);
735     }
736     OUT_BCS_BATCH(batch, 0);
737     OUT_BCS_BATCH(batch, 0);
738
739     ADVANCE_BCS_BATCH(batch);
740 }
741
742 static void
743 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
744 {
745     struct intel_batchbuffer *batch = encoder_context->base.batch;
746     int i;
747
748     BEGIN_BCS_BATCH(batch, 10);
749     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
750     OUT_BCS_BATCH(batch, 0);                  //Select L0
751     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
752     for(i = 0; i < 7; i++) {
753         OUT_BCS_BATCH(batch, 0x80808080);
754     }   
755     ADVANCE_BCS_BATCH(batch);
756
757     BEGIN_BCS_BATCH(batch, 10);
758     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
759     OUT_BCS_BATCH(batch, 1);                  //Select L1
760     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
761     for(i = 0; i < 7; i++) {
762         OUT_BCS_BATCH(batch, 0x80808080);
763     }   
764     ADVANCE_BCS_BATCH(batch);
765 }
766
767
768 static void
769 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
770                                 struct intel_encoder_context *encoder_context)
771 {
772     struct intel_batchbuffer *batch = encoder_context->base.batch;
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774
775     BEGIN_BCS_BATCH(batch, 10);
776
777     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
778     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
779                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
780                   0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783         
784         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787     OUT_BCS_BATCH(batch, 0);
788
789         /* the DW7-9 is for Bitplane Read Buffer Base Address */
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793
794     ADVANCE_BCS_BATCH(batch);
795 }
796
797 static void
798 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
799 {
800     struct intel_batchbuffer *batch = encoder_context->base.batch;
801     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
802     struct i965_driver_data *i965 = i965_driver_data(ctx);
803
804     if (IS_STEPPING_BPLUS(i965)) {
805         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
806         return;
807     }
808
809     BEGIN_BCS_BATCH(batch, 4);
810
811     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
812     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
813                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
814                   0);
815     OUT_BCS_BATCH(batch, 0);
816     OUT_BCS_BATCH(batch, 0);
817
818     ADVANCE_BCS_BATCH(batch);
819 }
820
821
822 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
823                                       struct encode_state *encode_state,
824                                       struct intel_encoder_context *encoder_context)
825 {
826     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
827
828     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
829     mfc_context->set_surface_state(ctx, encoder_context);
830     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
831     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
832     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
833     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
834     mfc_context->avc_qm_state(ctx, encoder_context);
835     mfc_context->avc_fqm_state(ctx, encoder_context);
836     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
837     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
838 }
839
840
841 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
842                              struct encode_state *encode_state,
843                              struct intel_encoder_context *encoder_context)
844 {
845     struct intel_batchbuffer *batch = encoder_context->base.batch;
846
847     intel_batchbuffer_flush(batch);             //run the pipeline
848
849     return VA_STATUS_SUCCESS;
850 }
851
852
853 static VAStatus
854 gen75_mfc_stop(VADriverContextP ctx, 
855               struct encode_state *encode_state,
856               struct intel_encoder_context *encoder_context,
857               int *encoded_bits_size)
858 {
859     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
860     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
861     VACodedBufferSegment *coded_buffer_segment;
862     
863     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
864     assert(vaStatus == VA_STATUS_SUCCESS);
865     *encoded_bits_size = coded_buffer_segment->size * 8;
866     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
867
868     return VA_STATUS_SUCCESS;
869 }
870
871
872 static void
873 gen75_mfc_avc_slice_state(VADriverContextP ctx,
874                          VAEncPictureParameterBufferH264 *pic_param,
875                          VAEncSliceParameterBufferH264 *slice_param,
876                          struct encode_state *encode_state,
877                          struct intel_encoder_context *encoder_context,
878                          int rate_control_enable,
879                          int qp,
880                          struct intel_batchbuffer *batch)
881 {
882     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
883     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
884     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
885     int beginmb = slice_param->macroblock_address;
886     int endmb = beginmb + slice_param->num_macroblocks;
887     int beginx = beginmb % width_in_mbs;
888     int beginy = beginmb / width_in_mbs;
889     int nextx =  endmb % width_in_mbs;
890     int nexty = endmb / width_in_mbs;
891     int slice_type = slice_param->slice_type;
892     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
893     int bit_rate_control_target, maxQpN, maxQpP;
894     unsigned char correct[6], grow, shrink;
895     int i;
896     int weighted_pred_idc = 0;
897     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
898     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
899
900     if (batch == NULL)
901         batch = encoder_context->base.batch;
902
903     bit_rate_control_target = slice_type;
904     if (slice_type == SLICE_TYPE_SP)
905         bit_rate_control_target = SLICE_TYPE_P;
906     else if (slice_type == SLICE_TYPE_SI)
907         bit_rate_control_target = SLICE_TYPE_I;
908
909     if (slice_type == SLICE_TYPE_P) {
910         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
911     } else if (slice_type == SLICE_TYPE_B) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
913
914         if (weighted_pred_idc == 2) {
915             /* 8.4.3 - Derivation process for prediction weights (8-279) */
916             luma_log2_weight_denom = 5;
917             chroma_log2_weight_denom = 5;
918         }
919     }
920
921     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
922     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
923
924     for (i = 0; i < 6; i++)
925         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
926
927     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
928         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
929     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
930         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
931
932     BEGIN_BCS_BATCH(batch, 11);;
933
934     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
935     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
936
937     if (slice_type == SLICE_TYPE_I) {
938         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
939     } else {
940         OUT_BCS_BATCH(batch,
941                       (1 << 16) |                       /*1 reference frame*/
942                       (chroma_log2_weight_denom << 8) |
943                       (luma_log2_weight_denom << 0));
944     }
945
946     OUT_BCS_BATCH(batch, 
947                   (weighted_pred_idc << 30) |
948                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
949                   (slice_param->disable_deblocking_filter_idc << 27) |
950                   (slice_param->cabac_init_idc << 24) |
951                   (qp<<16) |                    /*Slice Quantization Parameter*/
952                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
953                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
954     OUT_BCS_BATCH(batch,
955                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
956                   (beginx << 16) |
957                   slice_param->macroblock_address );
958     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
959     OUT_BCS_BATCH(batch, 
960                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
961                   (1 << 30) |           /*ResetRateControlCounter*/
962                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
963                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
964                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
965                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
966                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
967                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
968                   (last_slice << 19) |     /*IsLastSlice*/
969                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
970                   (1 << 17) |       /*HeaderPresentFlag*/       
971                   (1 << 16) |       /*SliceData PresentFlag*/
972                   (1 << 15) |       /*TailPresentFlag*/
973                   (1 << 13) |       /*RBSP NAL TYPE*/   
974                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
975     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
976     OUT_BCS_BATCH(batch,
977                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
978                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
979                   (shrink << 8)  |
980                   (grow << 0));   
981     OUT_BCS_BATCH(batch,
982                   (correct[5] << 20) |
983                   (correct[4] << 16) |
984                   (correct[3] << 12) |
985                   (correct[2] << 8) |
986                   (correct[1] << 4) |
987                   (correct[0] << 0));
988     OUT_BCS_BATCH(batch, 0);
989
990     ADVANCE_BCS_BATCH(batch);
991 }
992
993
994 #ifdef MFC_SOFTWARE_HASWELL
995
996 static int
997 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
998                                 int qp,unsigned int *msg,
999                               struct intel_encoder_context *encoder_context,
1000                               unsigned char target_mb_size, unsigned char max_mb_size,
1001                               struct intel_batchbuffer *batch)
1002 {
1003     int len_in_dwords = 12;
1004     unsigned int intra_msg;
1005 #define         INTRA_MSG_FLAG          (1 << 13)
1006 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1007     if (batch == NULL)
1008         batch = encoder_context->base.batch;
1009
1010     BEGIN_BCS_BATCH(batch, len_in_dwords);
1011
1012     intra_msg = msg[0] & 0xC0FF;
1013     intra_msg |= INTRA_MSG_FLAG;
1014     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1015     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 
1019                   (0 << 24) |           /* PackedMvNum, Debug*/
1020                   (0 << 20) |           /* No motion vector */
1021                   (1 << 19) |           /* CbpDcY */
1022                   (1 << 18) |           /* CbpDcU */
1023                   (1 << 17) |           /* CbpDcV */
1024                   intra_msg);
1025
1026     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1027     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1028     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1029
1030     /*Stuff for Intra MB*/
1031     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1032     OUT_BCS_BATCH(batch, msg[2]);       
1033     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1034     
1035     /*MaxSizeInWord and TargetSzieInWord*/
1036     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1037                   (target_mb_size << 16) );
1038
1039     OUT_BCS_BATCH(batch, 0);
1040
1041     ADVANCE_BCS_BATCH(batch);
1042
1043     return len_in_dwords;
1044 }
1045
1046 static int
1047 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1048                               unsigned int *msg, unsigned int offset,
1049                               struct intel_encoder_context *encoder_context,
1050                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1051                               struct intel_batchbuffer *batch)
1052 {
1053     int len_in_dwords = 12;
1054         unsigned int inter_msg = 0;
1055     if (batch == NULL)
1056         batch = encoder_context->base.batch;
1057     {
1058 #define MSG_MV_OFFSET   4
1059         unsigned int *mv_ptr;
1060         mv_ptr = msg + MSG_MV_OFFSET;
1061         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1062          * to convert them to be compatible with the format of AVC_PAK
1063          * command.
1064          */
1065         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1066                 /* MV[0] and MV[2] are replicated */
1067                 mv_ptr[4] = mv_ptr[0];
1068                 mv_ptr[5] = mv_ptr[1];
1069                 mv_ptr[2] = mv_ptr[8];
1070                 mv_ptr[3] = mv_ptr[9];
1071                 mv_ptr[6] = mv_ptr[8]; 
1072                 mv_ptr[7] = mv_ptr[9]; 
1073         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1074                 /* MV[0] and MV[1] are replicated */
1075                 mv_ptr[2] = mv_ptr[0];  
1076                 mv_ptr[3] = mv_ptr[1];
1077                 mv_ptr[4] = mv_ptr[16]; 
1078                 mv_ptr[5] = mv_ptr[17]; 
1079                 mv_ptr[6] = mv_ptr[24];
1080                 mv_ptr[7] = mv_ptr[25];
1081         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1082                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1083                 /* Don't touch MV[0] or MV[1] */
1084                 mv_ptr[2] = mv_ptr[8];
1085                 mv_ptr[3] = mv_ptr[9];
1086                 mv_ptr[4] = mv_ptr[16];
1087                 mv_ptr[5] = mv_ptr[17];
1088                 mv_ptr[6] = mv_ptr[24];
1089                 mv_ptr[7] = mv_ptr[25];
1090         }
1091     }
1092
1093     BEGIN_BCS_BATCH(batch, len_in_dwords);
1094
1095     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1096
1097         inter_msg = 32;
1098         /* MV quantity */
1099         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1100                 if (msg[1] & SUBMB_SHAPE_MASK)
1101                         inter_msg = 128;
1102         }
1103     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1104     OUT_BCS_BATCH(batch, offset);
1105         inter_msg = msg[0] & (0x1F00FFFF);
1106         inter_msg |= INTER_MV8;
1107         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1108         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1109                         (msg[1] & SUBMB_SHAPE_MASK)) {
1110                 inter_msg |= INTER_MV32;
1111         }
1112
1113     OUT_BCS_BATCH(batch, inter_msg);
1114
1115     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1116     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1117 #if 0 
1118     if ( slice_type == SLICE_TYPE_B) {
1119         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1120     } else {
1121         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1122     }
1123 #else
1124     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1125 #endif
1126
1127         inter_msg = msg[1] >> 8;
1128     /*Stuff for Inter MB*/
1129     OUT_BCS_BATCH(batch, inter_msg);        
1130     OUT_BCS_BATCH(batch, 0x0);    
1131     OUT_BCS_BATCH(batch, 0x0);        
1132
1133     /*MaxSizeInWord and TargetSzieInWord*/
1134     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1135                   (target_mb_size << 16) );
1136
1137     OUT_BCS_BATCH(batch, 0x0);    
1138
1139     ADVANCE_BCS_BATCH(batch);
1140
1141     return len_in_dwords;
1142 }
1143
1144 #define         INTRA_RDO_OFFSET        4
1145 #define         INTER_RDO_OFFSET        54
1146 #define         INTER_MSG_OFFSET        52
1147 #define         INTER_MV_OFFSET         224
1148 #define         RDO_MASK                0xFFFF
1149
1150 static void 
1151 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1152                                        struct encode_state *encode_state,
1153                                        struct intel_encoder_context *encoder_context,
1154                                        int slice_index,
1155                                        struct intel_batchbuffer *slice_batch)
1156 {
1157     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1158     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1159     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1160     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1162     unsigned int *msg = NULL, offset = 0;
1163     unsigned char *msg_ptr = NULL;
1164     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1165     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1166     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1167     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1168     int i,x,y;
1169     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1170     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1171     unsigned char *slice_header = NULL;
1172     int slice_header_length_in_bits = 0;
1173     unsigned int tail_data[] = { 0x0, 0x0 };
1174     int slice_type = pSliceParameter->slice_type;
1175
1176
1177     if (rate_control_mode == VA_RC_CBR) {
1178         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1179         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1180     }
1181
1182     /* only support for 8-bit pixel bit-depth */
1183     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1184     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1185     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1186     assert(qp >= 0 && qp < 52);
1187
1188     gen75_mfc_avc_slice_state(ctx, 
1189                              pPicParameter,
1190                              pSliceParameter,
1191                              encode_state, encoder_context,
1192                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1193
1194     if ( slice_index == 0) 
1195         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196
1197     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1198
1199     // slice hander
1200     mfc_context->insert_object(ctx, encoder_context,
1201                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1202                                5,  /* first 5 bytes are start code + nal unit type */
1203                                1, 0, 1, slice_batch);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220
1221         if (is_intra) {
1222             assert(msg);
1223             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224         } else {
1225             int inter_rdo, intra_rdo;
1226             inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1227             intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1228             offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1229             if (intra_rdo < inter_rdo) { 
1230                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1231             } else {
1232                 msg += INTER_MSG_OFFSET;
1233                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1234             }
1235         }
1236     }
1237    
1238     dri_bo_unmap(vme_context->vme_output.bo);
1239
1240     if ( last_slice ) {    
1241         mfc_context->insert_object(ctx, encoder_context,
1242                                    tail_data, 2, 8,
1243                                    2, 1, 1, 0, slice_batch);
1244     } else {
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 1, 8,
1247                                    1, 1, 1, 0, slice_batch);
1248     }
1249
1250     free(slice_header);
1251
1252 }
1253
1254 static dri_bo *
1255 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1256                                   struct encode_state *encode_state,
1257                                   struct intel_encoder_context *encoder_context)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch;
1261     dri_bo *batch_bo;
1262     int i;
1263     int buffer_size;
1264     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1266     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1267
1268     buffer_size = width_in_mbs * height_in_mbs * 64;
1269     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1270     batch_bo = batch->buffer;
1271     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1272         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1273     }
1274
1275     intel_batchbuffer_align(batch, 8);
1276     
1277     BEGIN_BCS_BATCH(batch, 2);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1280     ADVANCE_BCS_BATCH(batch);
1281
1282     dri_bo_reference(batch_bo);
1283     intel_batchbuffer_free(batch);
1284
1285     return batch_bo;
1286 }
1287
1288 #else
1289
1290 static void
1291 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1292                                     struct encode_state *encode_state,
1293                                     struct intel_encoder_context *encoder_context)
1294
1295 {
1296     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298
1299     assert(vme_context->vme_output.bo);
1300     mfc_context->buffer_suface_setup(ctx,
1301                                      &mfc_context->gpe_context,
1302                                      &vme_context->vme_output,
1303                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1304                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1305     assert(mfc_context->aux_batchbuffer_surface.bo);
1306     mfc_context->buffer_suface_setup(ctx,
1307                                      &mfc_context->gpe_context,
1308                                      &mfc_context->aux_batchbuffer_surface,
1309                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1310                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1311 }
1312
1313 static void
1314 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1315                                      struct encode_state *encode_state,
1316                                      struct intel_encoder_context *encoder_context)
1317
1318 {
1319     struct i965_driver_data *i965 = i965_driver_data(ctx);
1320     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1321     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1322     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1323     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1324     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1325     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1326     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1327     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1328                                                            "MFC batchbuffer",
1329                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1330                                                            0x1000);
1331     mfc_context->buffer_suface_setup(ctx,
1332                                      &mfc_context->gpe_context,
1333                                      &mfc_context->mfc_batchbuffer_surface,
1334                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1335                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1336 }
1337
1338 static void
1339 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1340                                     struct encode_state *encode_state,
1341                                     struct intel_encoder_context *encoder_context)
1342 {
1343     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1344     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1345 }
1346
1347 static void
1348 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1349                                 struct encode_state *encode_state,
1350                                 struct intel_encoder_context *encoder_context)
1351 {
1352     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1353     struct gen6_interface_descriptor_data *desc;   
1354     int i;
1355     dri_bo *bo;
1356
1357     bo = mfc_context->gpe_context.idrt.bo;
1358     dri_bo_map(bo, 1);
1359     assert(bo->virtual);
1360     desc = bo->virtual;
1361
1362     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1363         struct i965_kernel *kernel;
1364
1365         kernel = &mfc_context->gpe_context.kernels[i];
1366         assert(sizeof(*desc) == 32);
1367
1368         /*Setup the descritor table*/
1369         memset(desc, 0, sizeof(*desc));
1370         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1371         desc->desc2.sampler_count = 0;
1372         desc->desc2.sampler_state_pointer = 0;
1373         desc->desc3.binding_table_entry_count = 2;
1374         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1375         desc->desc4.constant_urb_entry_read_offset = 0;
1376         desc->desc4.constant_urb_entry_read_length = 4;
1377                 
1378         /*kernel start*/
1379         dri_bo_emit_reloc(bo,   
1380                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1381                           0,
1382                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1383                           kernel->bo);
1384         desc++;
1385     }
1386
1387     dri_bo_unmap(bo);
1388 }
1389
1390 static void
1391 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1392                                     struct encode_state *encode_state,
1393                                     struct intel_encoder_context *encoder_context)
1394 {
1395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1396     
1397     (void)mfc_context;
1398 }
1399
1400 static void
1401 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1402                                          int index,
1403                                          int head_offset,
1404                                          int batchbuffer_offset,
1405                                          int head_size,
1406                                          int tail_size,
1407                                          int number_mb_cmds,
1408                                          int first_object,
1409                                          int last_object,
1410                                          int last_slice,
1411                                          int mb_x,
1412                                          int mb_y,
1413                                          int width_in_mbs,
1414                                          int qp)
1415 {
1416     BEGIN_BATCH(batch, 12);
1417     
1418     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1419     OUT_BATCH(batch, index);
1420     OUT_BATCH(batch, 0);
1421     OUT_BATCH(batch, 0);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424    
1425     /*inline data */
1426     OUT_BATCH(batch, head_offset);
1427     OUT_BATCH(batch, batchbuffer_offset);
1428     OUT_BATCH(batch, 
1429               head_size << 16 |
1430               tail_size);
1431     OUT_BATCH(batch,
1432               number_mb_cmds << 16 |
1433               first_object << 2 |
1434               last_object << 1 |
1435               last_slice);
1436     OUT_BATCH(batch,
1437               mb_y << 8 |
1438               mb_x);
1439     OUT_BATCH(batch,
1440               qp << 16 |
1441               width_in_mbs);
1442
1443     ADVANCE_BATCH(batch);
1444 }
1445
1446 static void
1447 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1448                                        struct intel_encoder_context *encoder_context,
1449                                        VAEncSliceParameterBufferH264 *slice_param,
1450                                        int head_offset,
1451                                        unsigned short head_size,
1452                                        unsigned short tail_size,
1453                                        int batchbuffer_offset,
1454                                        int qp,
1455                                        int last_slice)
1456 {
1457     struct intel_batchbuffer *batch = encoder_context->base.batch;
1458     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1459     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1460     int total_mbs = slice_param->num_macroblocks;
1461     int number_mb_cmds = 128;
1462     int starting_mb = 0;
1463     int last_object = 0;
1464     int first_object = 1;
1465     int i;
1466     int mb_x, mb_y;
1467     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1468
1469     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1470         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1471         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1472         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1473         assert(mb_x <= 255 && mb_y <= 255);
1474
1475         starting_mb += number_mb_cmds;
1476
1477         gen75_mfc_batchbuffer_emit_object_command(batch,
1478                                                  index,
1479                                                  head_offset,
1480                                                  batchbuffer_offset,
1481                                                  head_size,
1482                                                  tail_size,
1483                                                  number_mb_cmds,
1484                                                  first_object,
1485                                                  last_object,
1486                                                  last_slice,
1487                                                  mb_x,
1488                                                  mb_y,
1489                                                  width_in_mbs,
1490                                                  qp);
1491
1492         if (first_object) {
1493             head_offset += head_size;
1494             batchbuffer_offset += head_size;
1495         }
1496
1497         if (last_object) {
1498             head_offset += tail_size;
1499             batchbuffer_offset += tail_size;
1500         }
1501
1502         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1503
1504         first_object = 0;
1505     }
1506
1507     if (!last_object) {
1508         last_object = 1;
1509         number_mb_cmds = total_mbs % number_mb_cmds;
1510         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1511         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1512         assert(mb_x <= 255 && mb_y <= 255);
1513         starting_mb += number_mb_cmds;
1514
1515         gen75_mfc_batchbuffer_emit_object_command(batch,
1516                                                  index,
1517                                                  head_offset,
1518                                                  batchbuffer_offset,
1519                                                  head_size,
1520                                                  tail_size,
1521                                                  number_mb_cmds,
1522                                                  first_object,
1523                                                  last_object,
1524                                                  last_slice,
1525                                                  mb_x,
1526                                                  mb_y,
1527                                                  width_in_mbs,
1528                                                  qp);
1529     }
1530 }
1531                           
1532 /*
1533  * return size in Owords (16bytes)
1534  */         
1535 static int
1536 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1537                                struct encode_state *encode_state,
1538                                struct intel_encoder_context *encoder_context,
1539                                int slice_index,
1540                                int batchbuffer_offset)
1541 {
1542     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1543     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1544     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1545     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1546     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1547     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1548     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1549     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1550     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1551     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1552     unsigned char *slice_header = NULL;
1553     int slice_header_length_in_bits = 0;
1554     unsigned int tail_data[] = { 0x0, 0x0 };
1555     long head_offset;
1556     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1557     unsigned short head_size, tail_size;
1558     int slice_type = pSliceParameter->slice_type;
1559
1560     if (rate_control_mode == VA_RC_CBR) {
1561         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1562         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1563     }
1564
1565     /* only support for 8-bit pixel bit-depth */
1566     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1567     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1568     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1569     assert(qp >= 0 && qp < 52);
1570
1571     head_offset = old_used / 16;
1572     gen75_mfc_avc_slice_state(ctx,
1573                              pPicParameter,
1574                              pSliceParameter,
1575                              encode_state,
1576                              encoder_context,
1577                              (rate_control_mode == VA_RC_CBR),
1578                              qp,
1579                              slice_batch);
1580
1581     if (slice_index == 0)
1582         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1583
1584     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1585
1586     // slice hander
1587     mfc_context->insert_object(ctx,
1588                                encoder_context,
1589                                (unsigned int *)slice_header,
1590                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1591                                slice_header_length_in_bits & 0x1f,
1592                                5,  /* first 5 bytes are start code + nal unit type */
1593                                1,
1594                                0,
1595                                1,
1596                                slice_batch);
1597     free(slice_header);
1598
1599     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1600     used = intel_batchbuffer_used_size(slice_batch);
1601     head_size = (used - old_used) / 16;
1602     old_used = used;
1603
1604     /* tail */
1605     if (last_slice) {    
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    2,
1610                                    8,
1611                                    2,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     } else {
1617         mfc_context->insert_object(ctx,
1618                                    encoder_context,
1619                                    tail_data,
1620                                    1,
1621                                    8,
1622                                    1,
1623                                    1,
1624                                    1,
1625                                    0,
1626                                    slice_batch);
1627     }
1628
1629     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1630     used = intel_batchbuffer_used_size(slice_batch);
1631     tail_size = (used - old_used) / 16;
1632
1633    
1634     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1635                                            encoder_context,
1636                                            pSliceParameter,
1637                                            head_offset,
1638                                            head_size,
1639                                            tail_size,
1640                                            batchbuffer_offset,
1641                                            qp,
1642                                            last_slice);
1643
1644     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1645 }
1646
1647 static void
1648 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1649                                   struct encode_state *encode_state,
1650                                   struct intel_encoder_context *encoder_context)
1651 {
1652     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1653     struct intel_batchbuffer *batch = encoder_context->base.batch;
1654     int i, size, offset = 0;
1655     intel_batchbuffer_start_atomic(batch, 0x4000); 
1656     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1657
1658     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1659         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1660         offset += size;
1661     }
1662
1663     intel_batchbuffer_end_atomic(batch);
1664     intel_batchbuffer_flush(batch);
1665 }
1666
1667 static void
1668 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1669                                struct encode_state *encode_state,
1670                                struct intel_encoder_context *encoder_context)
1671 {
1672     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1673     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1674     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1675     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1676 }
1677
1678 static dri_bo *
1679 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1680                                   struct encode_state *encode_state,
1681                                   struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1684
1685     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1686     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1687
1688     return mfc_context->mfc_batchbuffer_surface.bo;
1689 }
1690
1691 #endif
1692
1693 static void
1694 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1695                                  struct encode_state *encode_state,
1696                                  struct intel_encoder_context *encoder_context)
1697 {
1698     struct intel_batchbuffer *batch = encoder_context->base.batch;
1699     dri_bo *slice_batch_bo;
1700
1701     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1702         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1703         assert(0);
1704         return; 
1705     }
1706
1707 #ifdef MFC_SOFTWARE_HASWELL
1708     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1709 #else
1710     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1711 #endif
1712
1713     // begin programing
1714     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1715     intel_batchbuffer_emit_mi_flush(batch);
1716     
1717     // picture level programing
1718     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1719
1720     BEGIN_BCS_BATCH(batch, 2);
1721     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1722     OUT_BCS_RELOC(batch,
1723                   slice_batch_bo,
1724                   I915_GEM_DOMAIN_COMMAND, 0, 
1725                   0);
1726     ADVANCE_BCS_BATCH(batch);
1727
1728     // end programing
1729     intel_batchbuffer_end_atomic(batch);
1730
1731     dri_bo_unreference(slice_batch_bo);
1732 }
1733
1734
1735 static VAStatus
1736 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1737                             struct encode_state *encode_state,
1738                             struct intel_encoder_context *encoder_context)
1739 {
1740     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1741     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1742     int current_frame_bits_size;
1743     int sts;
1744  
1745     for (;;) {
1746         gen75_mfc_init(ctx, encode_state, encoder_context);
1747         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1748         /*Programing bcs pipeline*/
1749         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1750         gen75_mfc_run(ctx, encode_state, encoder_context);
1751         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1752             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1753             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1754             if (sts == BRC_NO_HRD_VIOLATION) {
1755                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1756                 break;
1757             }
1758             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1759                 if (!mfc_context->hrd.violation_noted) {
1760                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1761                     mfc_context->hrd.violation_noted = 1;
1762                 }
1763                 return VA_STATUS_SUCCESS;
1764             }
1765         } else {
1766             break;
1767         }
1768     }
1769
1770     return VA_STATUS_SUCCESS;
1771 }
1772
1773 /*
1774  * MPEG-2
1775  */
1776
1777 static const int
1778 va_to_gen75_mpeg2_picture_type[3] = {
1779     1,  /* I */
1780     2,  /* P */
1781     3   /* B */
1782 };
1783
1784 static void
1785 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1786                           struct intel_encoder_context *encoder_context,
1787                           struct encode_state *encode_state)
1788 {
1789     struct intel_batchbuffer *batch = encoder_context->base.batch;
1790     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1791     VAEncPictureParameterBufferMPEG2 *pic_param;
1792     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1793     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1794
1795     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1796     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1797
1798     BEGIN_BCS_BATCH(batch, 13);
1799     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1800     OUT_BCS_BATCH(batch,
1801                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1802                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1803                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1804                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1805                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1806                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1807                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1808                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1809                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1810                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1811                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1812                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1813     OUT_BCS_BATCH(batch,
1814                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1815                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1816                   0);
1817     OUT_BCS_BATCH(batch,
1818                   1 << 31 |     /* slice concealment */
1819                   (height_in_mbs - 1) << 16 |
1820                   (width_in_mbs - 1));
1821     OUT_BCS_BATCH(batch, 0);
1822     OUT_BCS_BATCH(batch, 0);
1823     OUT_BCS_BATCH(batch,
1824                   0xFFF << 16 | /* InterMBMaxSize */
1825                   0xFFF << 0 |  /* IntraMBMaxSize */
1826                   0);
1827     OUT_BCS_BATCH(batch, 0);
1828     OUT_BCS_BATCH(batch, 0);
1829     OUT_BCS_BATCH(batch, 0);
1830     OUT_BCS_BATCH(batch, 0);
1831     OUT_BCS_BATCH(batch, 0);
1832     OUT_BCS_BATCH(batch, 0);
1833     ADVANCE_BCS_BATCH(batch);
1834 }
1835
1836 static void
1837 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1838 {
1839     unsigned char intra_qm[64] = {
1840          8, 16, 19, 22, 26, 27, 29, 34,
1841         16, 16, 22, 24, 27, 29, 34, 37,
1842         19, 22, 26, 27, 29, 34, 34, 38,
1843         22, 22, 26, 27, 29, 34, 37, 40,
1844         22, 26, 27, 29, 32, 35, 40, 48,
1845         26, 27, 29, 32, 35, 40, 48, 58,
1846         26, 27, 29, 34, 38, 46, 56, 69,
1847         27, 29, 35, 38, 46, 56, 69, 83
1848     };
1849
1850     unsigned char non_intra_qm[64] = {
1851         16, 16, 16, 16, 16, 16, 16, 16,
1852         16, 16, 16, 16, 16, 16, 16, 16,
1853         16, 16, 16, 16, 16, 16, 16, 16,
1854         16, 16, 16, 16, 16, 16, 16, 16,
1855         16, 16, 16, 16, 16, 16, 16, 16,
1856         16, 16, 16, 16, 16, 16, 16, 16,
1857         16, 16, 16, 16, 16, 16, 16, 16,
1858         16, 16, 16, 16, 16, 16, 16, 16
1859     };
1860
1861     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1862     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1863 }
1864
1865 static void
1866 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1867 {
1868     unsigned short intra_fqm[64] = {
1869          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1870          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1871          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1872          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1873          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1874          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1875          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1876          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1877     };
1878
1879     unsigned short non_intra_fqm[64] = {
1880         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1881         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1882         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1884         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1885         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1886         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1887         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1888     };
1889
1890     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1891     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1892 }
1893
1894 static void
1895 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1896                                  struct intel_encoder_context *encoder_context,
1897                                  int x, int y,
1898                                  int next_x, int next_y,
1899                                  int is_fisrt_slice_group,
1900                                  int is_last_slice_group,
1901                                  int intra_slice,
1902                                  int qp,
1903                                  struct intel_batchbuffer *batch)
1904 {
1905     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1906
1907     if (batch == NULL)
1908         batch = encoder_context->base.batch;
1909
1910     BEGIN_BCS_BATCH(batch, 8);
1911
1912     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1913     OUT_BCS_BATCH(batch,
1914                   0 << 31 |                             /* MbRateCtrlFlag */
1915                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1916                   1 << 17 |                             /* Insert Header before the first slice group data */
1917                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1918                   1 << 15 |                             /* TailPresentFlag: always 1 */
1919                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1920                   !!intra_slice << 13 |                 /* IntraSlice */
1921                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1922                   0);
1923     OUT_BCS_BATCH(batch,
1924                   next_y << 24 |
1925                   next_x << 16 |
1926                   y << 8 |
1927                   x << 0 |
1928                   0);
1929     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1930     /* bitstream pointer is only loaded once for the first slice of a frame when 
1931      * LoadSlicePointerFlag is 0
1932      */
1933     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1934     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1935     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1936     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1937
1938     ADVANCE_BCS_BATCH(batch);
1939 }
1940
1941 static int
1942 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1943                                  struct intel_encoder_context *encoder_context,
1944                                  int x, int y,
1945                                  int first_mb_in_slice,
1946                                  int last_mb_in_slice,
1947                                  int first_mb_in_slice_group,
1948                                  int last_mb_in_slice_group,
1949                                  int mb_type,
1950                                  int qp_scale_code,
1951                                  int coded_block_pattern,
1952                                  unsigned char target_size_in_word,
1953                                  unsigned char max_size_in_word,
1954                                  struct intel_batchbuffer *batch)
1955 {
1956     int len_in_dwords = 9;
1957
1958     if (batch == NULL)
1959         batch = encoder_context->base.batch;
1960
1961     BEGIN_BCS_BATCH(batch, len_in_dwords);
1962
1963     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1964     OUT_BCS_BATCH(batch,
1965                   0 << 24 |     /* PackedMvNum */
1966                   0 << 20 |     /* MvFormat */
1967                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1968                   0 << 15 |     /* TransformFlag: frame DCT */
1969                   0 << 14 |     /* FieldMbFlag */
1970                   1 << 13 |     /* IntraMbFlag */
1971                   mb_type << 8 |   /* MbType: Intra */
1972                   0 << 2 |      /* SkipMbFlag */
1973                   0 << 0 |      /* InterMbMode */
1974                   0);
1975     OUT_BCS_BATCH(batch, y << 16 | x);
1976     OUT_BCS_BATCH(batch,
1977                   max_size_in_word << 24 |
1978                   target_size_in_word << 16 |
1979                   coded_block_pattern << 6 |      /* CBP */
1980                   0);
1981     OUT_BCS_BATCH(batch,
1982                   last_mb_in_slice << 31 |
1983                   first_mb_in_slice << 30 |
1984                   0 << 27 |     /* EnableCoeffClamp */
1985                   last_mb_in_slice_group << 26 |
1986                   0 << 25 |     /* MbSkipConvDisable */
1987                   first_mb_in_slice_group << 24 |
1988                   0 << 16 |     /* MvFieldSelect */
1989                   qp_scale_code << 0 |
1990                   0);
1991     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1992     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1993     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1994     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1995
1996     ADVANCE_BCS_BATCH(batch);
1997
1998     return len_in_dwords;
1999 }
2000
2001 #define MV_OFFSET_IN_WORD       112
2002
2003 static struct _mv_ranges
2004 {
2005     int low;    /* in the unit of 1/2 pixel */
2006     int high;   /* in the unit of 1/2 pixel */
2007 } mv_ranges[] = {
2008     {0, 0},
2009     {-16, 15},
2010     {-32, 31},
2011     {-64, 63},
2012     {-128, 127},
2013     {-256, 255},
2014     {-512, 511},
2015     {-1024, 1023},
2016     {-2048, 2047},
2017     {-4096, 4095}
2018 };
2019
2020 static int
2021 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2022 {
2023     if (mv + pos * 16 * 2 < 0 ||
2024         mv + (pos + 1) * 16 * 2 > display_max * 2)
2025         mv = 0;
2026
2027     if (f_code > 0 && f_code < 10) {
2028         if (mv < mv_ranges[f_code].low)
2029             mv = mv_ranges[f_code].low;
2030
2031         if (mv > mv_ranges[f_code].high)
2032             mv = mv_ranges[f_code].high;
2033     }
2034
2035     return mv;
2036 }
2037
2038 static int
2039 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2040                                  struct encode_state *encode_state,
2041                                  struct intel_encoder_context *encoder_context,
2042                                  unsigned int *msg,
2043                                  int width_in_mbs, int height_in_mbs,
2044                                  int x, int y,
2045                                  int first_mb_in_slice,
2046                                  int last_mb_in_slice,
2047                                  int first_mb_in_slice_group,
2048                                  int last_mb_in_slice_group,
2049                                  int qp_scale_code,
2050                                  unsigned char target_size_in_word,
2051                                  unsigned char max_size_in_word,
2052                                  struct intel_batchbuffer *batch)
2053 {
2054     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2055     int len_in_dwords = 9;
2056     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2057     
2058     if (batch == NULL)
2059         batch = encoder_context->base.batch;
2060
2061     mvptr = (short *)msg;
2062     mvx0 = mpeg2_motion_vector(mvptr[MV_OFFSET_IN_WORD + 0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2063     mvy0 = mpeg2_motion_vector(mvptr[MV_OFFSET_IN_WORD + 1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2064     mvx1 = mpeg2_motion_vector(mvptr[MV_OFFSET_IN_WORD + 2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2065     mvy1 = mpeg2_motion_vector(mvptr[MV_OFFSET_IN_WORD + 3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2066
2067     BEGIN_BCS_BATCH(batch, len_in_dwords);
2068
2069     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2070     OUT_BCS_BATCH(batch,
2071                   2 << 24 |     /* PackedMvNum */
2072                   7 << 20 |     /* MvFormat */
2073                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2074                   0 << 15 |     /* TransformFlag: frame DCT */
2075                   0 << 14 |     /* FieldMbFlag */
2076                   0 << 13 |     /* IntraMbFlag */
2077                   1 << 8 |      /* MbType: Frame-based */
2078                   0 << 2 |      /* SkipMbFlag */
2079                   0 << 0 |      /* InterMbMode */
2080                   0);
2081     OUT_BCS_BATCH(batch, y << 16 | x);
2082     OUT_BCS_BATCH(batch,
2083                   max_size_in_word << 24 |
2084                   target_size_in_word << 16 |
2085                   0x3f << 6 |   /* CBP */
2086                   0);
2087     OUT_BCS_BATCH(batch,
2088                   last_mb_in_slice << 31 |
2089                   first_mb_in_slice << 30 |
2090                   0 << 27 |     /* EnableCoeffClamp */
2091                   last_mb_in_slice_group << 26 |
2092                   0 << 25 |     /* MbSkipConvDisable */
2093                   first_mb_in_slice_group << 24 |
2094                   0 << 16 |     /* MvFieldSelect */
2095                   qp_scale_code << 0 |
2096                   0);
2097
2098     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2099     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2100     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2101     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2102
2103     ADVANCE_BCS_BATCH(batch);
2104
2105     return len_in_dwords;
2106 }
2107
2108 #define INTRA_RDO_OFFSET        4
2109 #define INTER_RDO_OFFSET        54
2110 #define INTER_MSG_OFFSET        52
2111 #define INTER_MV_OFFSET         224
2112 #define RDO_MASK                0xFFFF
2113
2114 static void
2115 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2116                                            struct encode_state *encode_state,
2117                                            struct intel_encoder_context *encoder_context,
2118                                            struct intel_batchbuffer *slice_batch)
2119 {
2120     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2121     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2122
2123     if (encode_state->packed_header_data[idx]) {
2124         VAEncPackedHeaderParameterBuffer *param = NULL;
2125         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2126         unsigned int length_in_bits;
2127
2128         assert(encode_state->packed_header_param[idx]);
2129         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2130         length_in_bits = param->bit_length;
2131
2132         mfc_context->insert_object(ctx,
2133                                    encoder_context,
2134                                    header_data,
2135                                    ALIGN(length_in_bits, 32) >> 5,
2136                                    length_in_bits & 0x1f,
2137                                    5,   /* FIXME: check it */
2138                                    0,
2139                                    0,
2140                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2141                                    slice_batch);
2142     }
2143
2144     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2145
2146     if (encode_state->packed_header_data[idx]) {
2147         VAEncPackedHeaderParameterBuffer *param = NULL;
2148         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2149         unsigned int length_in_bits;
2150
2151         assert(encode_state->packed_header_param[idx]);
2152         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2153         length_in_bits = param->bit_length;
2154
2155         mfc_context->insert_object(ctx,
2156                                    encoder_context,
2157                                    header_data,
2158                                    ALIGN(length_in_bits, 32) >> 5,
2159                                    length_in_bits & 0x1f,
2160                                    5,   /* FIXME: check it */
2161                                    0,
2162                                    0,
2163                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2164                                    slice_batch);
2165     }
2166 }
2167
2168 static void 
2169 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2170                                      struct encode_state *encode_state,
2171                                      struct intel_encoder_context *encoder_context,
2172                                      int slice_index,
2173                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2174                                      struct intel_batchbuffer *slice_batch)
2175 {
2176     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2177     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2178     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2179     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2180     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2181     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2182     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2183     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2184     int i, j;
2185     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2186     unsigned int *msg = NULL, offset = 0;
2187     unsigned char *msg_ptr = NULL;
2188
2189     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2190     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2191     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2192     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2193
2194     dri_bo_map(vme_context->vme_output.bo , 0);
2195     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2196
2197     if (next_slice_group_param) {
2198         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2199         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2200     } else {
2201         h_next_start_pos = 0;
2202         v_next_start_pos = height_in_mbs;
2203     }
2204
2205     gen75_mfc_mpeg2_slicegroup_state(ctx,
2206                                      encoder_context,
2207                                      h_start_pos,
2208                                      v_start_pos,
2209                                      h_next_start_pos,
2210                                      v_next_start_pos,
2211                                      slice_index == 0,
2212                                      next_slice_group_param == NULL,
2213                                      slice_param->is_intra_slice,
2214                                      slice_param->quantiser_scale_code,
2215                                      slice_batch);
2216
2217     if (slice_index == 0) 
2218         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2219
2220     /* Insert '00' to make sure the header is valid */
2221     mfc_context->insert_object(ctx,
2222                                encoder_context,
2223                                (unsigned int*)section_delimiter,
2224                                1,
2225                                8,   /* 8bits in the last DWORD */
2226                                1,   /* 1 byte */
2227                                1,
2228                                0,
2229                                0,
2230                                slice_batch);
2231
2232     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2233         /* PAK for each macroblocks */
2234         for (j = 0; j < slice_param->num_macroblocks; j++) {
2235             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2236             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2237             int first_mb_in_slice = (j == 0);
2238             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2239             int first_mb_in_slice_group = (i == 0 && j == 0);
2240             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2241                                           j == slice_param->num_macroblocks - 1);
2242
2243             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2244
2245             if (slice_param->is_intra_slice) {
2246                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2247                                                  encoder_context,
2248                                                  h_pos, v_pos,
2249                                                  first_mb_in_slice,
2250                                                  last_mb_in_slice,
2251                                                  first_mb_in_slice_group,
2252                                                  last_mb_in_slice_group,
2253                                                  0x1a,
2254                                                  slice_param->quantiser_scale_code,
2255                                                  0x3f,
2256                                                  0,
2257                                                  0xff,
2258                                                  slice_batch);
2259             } else {
2260                 gen75_mfc_mpeg2_pak_object_inter(ctx,
2261                                                  encode_state,
2262                                                  encoder_context,
2263                                                  msg,
2264                                                  width_in_mbs, height_in_mbs,
2265                                                  h_pos, v_pos,
2266                                                  first_mb_in_slice,
2267                                                  last_mb_in_slice,
2268                                                  first_mb_in_slice_group,
2269                                                  last_mb_in_slice_group,
2270                                                  slice_param->quantiser_scale_code,
2271                                                  0,
2272                                                  0xff,
2273                                                  slice_batch);
2274             }
2275         }
2276
2277         slice_param++;
2278     }
2279
2280     dri_bo_unmap(vme_context->vme_output.bo);
2281
2282     /* tail data */
2283     if (next_slice_group_param == NULL) { /* end of a picture */
2284         mfc_context->insert_object(ctx,
2285                                    encoder_context,
2286                                    (unsigned int *)tail_delimiter,
2287                                    2,
2288                                    8,   /* 8bits in the last DWORD */
2289                                    5,   /* 5 bytes */
2290                                    1,
2291                                    1,
2292                                    0,
2293                                    slice_batch);
2294     } else {        /* end of a lsice group */
2295         mfc_context->insert_object(ctx,
2296                                    encoder_context,
2297                                    (unsigned int *)section_delimiter,
2298                                    1,
2299                                    8,   /* 8bits in the last DWORD */
2300                                    1,   /* 1 byte */
2301                                    1,
2302                                    1,
2303                                    0,
2304                                    slice_batch);
2305     }
2306 }
2307
2308 /* 
2309  * A batch buffer for all slices, including slice state, 
2310  * slice insert object and slice pak object commands
2311  *
2312  */
2313 static dri_bo *
2314 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2315                                            struct encode_state *encode_state,
2316                                            struct intel_encoder_context *encoder_context)
2317 {
2318     struct i965_driver_data *i965 = i965_driver_data(ctx);
2319     struct intel_batchbuffer *batch;
2320     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2321     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2322     dri_bo *batch_bo;
2323     int i;
2324     int buffer_size;
2325     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2326     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2327
2328     buffer_size = width_in_mbs * height_in_mbs * 64;
2329     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2330     batch_bo = batch->buffer;
2331
2332     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2333         if (i == encode_state->num_slice_params_ext - 1)
2334             next_slice_group_param = NULL;
2335         else
2336             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2337
2338         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2339     }
2340
2341     intel_batchbuffer_align(batch, 8);
2342     
2343     BEGIN_BCS_BATCH(batch, 2);
2344     OUT_BCS_BATCH(batch, 0);
2345     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2346     ADVANCE_BCS_BATCH(batch);
2347
2348     dri_bo_reference(batch_bo);
2349     intel_batchbuffer_free(batch);
2350
2351     return batch_bo;
2352 }
2353
2354 static void
2355 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2356                                             struct encode_state *encode_state,
2357                                             struct intel_encoder_context *encoder_context)
2358 {
2359     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2360
2361     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2362     mfc_context->set_surface_state(ctx, encoder_context);
2363     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2364     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2365     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2366     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2367     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2368     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2369 }
2370
2371 static void
2372 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2373                                     struct encode_state *encode_state,
2374                                     struct intel_encoder_context *encoder_context)
2375 {
2376     struct intel_batchbuffer *batch = encoder_context->base.batch;
2377     dri_bo *slice_batch_bo;
2378
2379     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2380
2381     // begin programing
2382     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2383     intel_batchbuffer_emit_mi_flush(batch);
2384     
2385     // picture level programing
2386     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2387
2388     BEGIN_BCS_BATCH(batch, 2);
2389     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2390     OUT_BCS_RELOC(batch,
2391                   slice_batch_bo,
2392                   I915_GEM_DOMAIN_COMMAND, 0, 
2393                   0);
2394     ADVANCE_BCS_BATCH(batch);
2395
2396     // end programing
2397     intel_batchbuffer_end_atomic(batch);
2398
2399     dri_bo_unreference(slice_batch_bo);
2400 }
2401
2402 static VAStatus
2403 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2404                         struct encode_state *encode_state,
2405                         struct intel_encoder_context *encoder_context)
2406 {
2407     struct i965_driver_data *i965 = i965_driver_data(ctx);
2408     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2409     struct object_surface *obj_surface; 
2410     struct object_buffer *obj_buffer;
2411     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2412     struct i965_coded_buffer_segment *coded_buffer_segment;
2413     VAStatus vaStatus = VA_STATUS_SUCCESS;
2414     dri_bo *bo;
2415     int i;
2416
2417     /* reconstructed surface */
2418     obj_surface = SURFACE(pic_param->reconstructed_picture);
2419     assert(obj_surface);
2420     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2421     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2422     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2423     mfc_context->surface_state.width = obj_surface->orig_width;
2424     mfc_context->surface_state.height = obj_surface->orig_height;
2425     mfc_context->surface_state.w_pitch = obj_surface->width;
2426     mfc_context->surface_state.h_pitch = obj_surface->height;
2427
2428     /* forward reference */
2429     obj_surface = SURFACE(pic_param->forward_reference_picture);
2430
2431     if (obj_surface && obj_surface->bo) {
2432         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2433         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2434     } else
2435         mfc_context->reference_surfaces[0].bo = NULL;
2436
2437     /* backward reference */
2438     obj_surface = SURFACE(pic_param->backward_reference_picture);
2439
2440     if (obj_surface && obj_surface->bo) {
2441         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2442         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2443     } else {
2444         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2445
2446         if (mfc_context->reference_surfaces[1].bo)
2447             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2448     }
2449
2450     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2451         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2452
2453         if (mfc_context->reference_surfaces[i].bo)
2454             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2455     }
2456     
2457     /* input YUV surface */
2458     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2459     assert(obj_surface && obj_surface->bo);
2460     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2461     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2462
2463     /* coded buffer */
2464     obj_buffer = BUFFER(pic_param->coded_buf);
2465     bo = obj_buffer->buffer_store->bo;
2466     assert(bo);
2467     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2468     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2469     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2470     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2471
2472     /* set the internal flag to 0 to indicate the coded size is unknown */
2473     dri_bo_map(bo, 1);
2474     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2475     coded_buffer_segment->mapped = 0;
2476     coded_buffer_segment->codec = CODED_MPEG2;
2477     dri_bo_unmap(bo);
2478
2479     return vaStatus;
2480 }
2481
2482 static VAStatus
2483 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2484                                struct encode_state *encode_state,
2485                                struct intel_encoder_context *encoder_context)
2486 {
2487     gen75_mfc_init(ctx, encode_state, encoder_context);
2488     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2489     /*Programing bcs pipeline*/
2490     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2491     gen75_mfc_run(ctx, encode_state, encoder_context);
2492
2493     return VA_STATUS_SUCCESS;
2494 }
2495
2496 static void
2497 gen75_mfc_context_destroy(void *context)
2498 {
2499     struct gen6_mfc_context *mfc_context = context;
2500     int i;
2501
2502     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2503     mfc_context->post_deblocking_output.bo = NULL;
2504
2505     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2506     mfc_context->pre_deblocking_output.bo = NULL;
2507
2508     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2509     mfc_context->uncompressed_picture_source.bo = NULL;
2510
2511     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2512     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2513
2514     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2515         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2516         mfc_context->direct_mv_buffers[i].bo = NULL;
2517     }
2518
2519     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2520     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2521
2522     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2523     mfc_context->macroblock_status_buffer.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2526     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2527
2528     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2529     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2530
2531
2532     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2533         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2534         mfc_context->reference_surfaces[i].bo = NULL;  
2535     }
2536
2537     i965_gpe_context_destroy(&mfc_context->gpe_context);
2538
2539     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2540     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2541
2542     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2543     mfc_context->aux_batchbuffer_surface.bo = NULL;
2544
2545     if (mfc_context->aux_batchbuffer)
2546         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2547
2548     mfc_context->aux_batchbuffer = NULL;
2549
2550     free(mfc_context);
2551 }
2552
2553 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2554                   VAProfile profile,
2555                   struct encode_state *encode_state,
2556                   struct intel_encoder_context *encoder_context)
2557 {
2558     VAStatus vaStatus;
2559
2560     switch (profile) {
2561     case VAProfileH264Baseline:
2562     case VAProfileH264Main:
2563     case VAProfileH264High:
2564         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2565         break;
2566
2567         /* FIXME: add for other profile */
2568     case VAProfileMPEG2Simple:
2569     case VAProfileMPEG2Main:
2570         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2571         break;
2572
2573     default:
2574         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2575         break;
2576     }
2577
2578     return vaStatus;
2579 }
2580
2581 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2582 {
2583     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2584
2585     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2586
2587     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2588     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2589
2590     mfc_context->gpe_context.curbe.length = 32 * 4;
2591
2592     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2593     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2594     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2595     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2596     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2597
2598     i965_gpe_load_kernels(ctx,
2599                           &mfc_context->gpe_context,
2600                           gen75_mfc_kernels,
2601                           NUM_MFC_KERNEL);
2602
2603     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2604     mfc_context->set_surface_state = gen75_mfc_surface_state;
2605     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2606     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2607     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2608     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2609     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2610     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2611
2612     encoder_context->mfc_context = mfc_context;
2613     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2614     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2615     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2616
2617     return True;
2618 }