Enable the Intra-prediction for MPEG2 P-B frame
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                 struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164         /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168         /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430
431     if (encoder_context->codec == CODEC_H264) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     /*Encode common setup for MFC*/
445     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
446     mfc_context->post_deblocking_output.bo = NULL;
447
448     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
449     mfc_context->pre_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
452     mfc_context->uncompressed_picture_source.bo = NULL;
453
454     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
455     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
456
457     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
458         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
459         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
460         mfc_context->direct_mv_buffers[i].bo = NULL;
461     }
462
463     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
464         if (mfc_context->reference_surfaces[i].bo != NULL)
465             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
466         mfc_context->reference_surfaces[i].bo = NULL;  
467     }
468
469     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       width_in_mbs * 64,
473                       64);
474     assert(bo);
475     mfc_context->intra_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
478     bo = dri_bo_alloc(i965->intel.bufmgr,
479                       "Buffer",
480                       width_in_mbs * height_in_mbs * 16,
481                       64);
482     assert(bo);
483     mfc_context->macroblock_status_buffer.bo = bo;
484
485     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
486     bo = dri_bo_alloc(i965->intel.bufmgr,
487                       "Buffer",
488                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
489                       64);
490     assert(bo);
491     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
492
493     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
494     bo = dri_bo_alloc(i965->intel.bufmgr,
495                       "Buffer",
496                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
497                       0x1000);
498     assert(bo);
499     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
500
501     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
502     mfc_context->mfc_batchbuffer_surface.bo = NULL;
503
504     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
505     mfc_context->aux_batchbuffer_surface.bo = NULL;
506
507     if (mfc_context->aux_batchbuffer)
508         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
509
510     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
511     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
512     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
513     mfc_context->aux_batchbuffer_surface.pitch = 16;
514     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
515     mfc_context->aux_batchbuffer_surface.size_block = 16;
516
517     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
518 }
519
520 static void
521 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
522                                 struct intel_encoder_context *encoder_context)
523 {
524     struct intel_batchbuffer *batch = encoder_context->base.batch;
525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526     int i;
527
528     BEGIN_BCS_BATCH(batch, 61);
529
530     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
531
532     /* the DW1-3 is for pre_deblocking */
533     if (mfc_context->pre_deblocking_output.bo)
534         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
535                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                       0);
537     else
538         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
539
540         OUT_BCS_BATCH(batch, 0);
541         OUT_BCS_BATCH(batch, 0);
542      /* the DW4-6 is for the post_deblocking */
543
544     if (mfc_context->post_deblocking_output.bo)
545         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
546                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                       0);                                                                                       /* post output addr  */ 
548     else
549         OUT_BCS_BATCH(batch, 0);
550         OUT_BCS_BATCH(batch, 0);
551         OUT_BCS_BATCH(batch, 0);
552
553      /* the DW7-9 is for the uncompressed_picture */
554     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* uncompressed data */
557
558         OUT_BCS_BATCH(batch, 0);
559         OUT_BCS_BATCH(batch, 0);
560
561      /* the DW10-12 is for the mb status */
562     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0); /* StreamOut data*/
565         OUT_BCS_BATCH(batch, 0);
566         OUT_BCS_BATCH(batch, 0);
567
568      /* the DW13-15 is for the intra_row_store_scratch */
569     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
570                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571                   0);   
572         OUT_BCS_BATCH(batch, 0);
573         OUT_BCS_BATCH(batch, 0);
574
575      /* the DW16-18 is for the deblocking filter */
576     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);
579         OUT_BCS_BATCH(batch, 0);
580         OUT_BCS_BATCH(batch, 0);
581
582     /* the DW 19-50 is for Reference pictures*/
583     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
584         if ( mfc_context->reference_surfaces[i].bo != NULL) {
585             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
586                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
587                           0);                   
588         } else {
589             OUT_BCS_BATCH(batch, 0);
590         }
591         OUT_BCS_BATCH(batch, 0);
592     }
593         OUT_BCS_BATCH(batch, 0);
594
595         /* The DW 52-54 is for the MB status buffer */
596     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
597                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598                   0);                                                                                   /* Macroblock status buffer*/
599         
600         OUT_BCS_BATCH(batch, 0);
601         OUT_BCS_BATCH(batch, 0);
602
603         /* the DW 55-57 is the ILDB buffer */
604         OUT_BCS_BATCH(batch, 0);
605         OUT_BCS_BATCH(batch, 0);
606         OUT_BCS_BATCH(batch, 0);
607
608         /* the DW 58-60 is the second ILDB buffer */
609         OUT_BCS_BATCH(batch, 0);
610         OUT_BCS_BATCH(batch, 0);
611         OUT_BCS_BATCH(batch, 0);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void
616 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
617 {
618     struct intel_batchbuffer *batch = encoder_context->base.batch;
619     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
620     struct i965_driver_data *i965 = i965_driver_data(ctx);
621     int i;
622
623     if (IS_STEPPING_BPLUS(i965)) {
624         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
625         return;
626     }
627
628     BEGIN_BCS_BATCH(batch, 25);
629
630     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
631
632     if (mfc_context->pre_deblocking_output.bo)
633         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
634                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                       0);
636     else
637         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
638
639     if (mfc_context->post_deblocking_output.bo)
640         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
641                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                       0);                                                                                       /* post output addr  */ 
643     else
644         OUT_BCS_BATCH(batch, 0);
645
646     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
647                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                   0);                                                                                   /* uncompressed data */
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* StreamOut data*/
652     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);   
655     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);
658     /* 7..22 Reference pictures*/
659     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
660         if ( mfc_context->reference_surfaces[i].bo != NULL) {
661             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
662                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                           0);                   
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666         }
667     }
668     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
669                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670                   0);                                                                                   /* Macroblock status buffer*/
671
672         OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
679                                 struct intel_encoder_context *encoder_context)
680 {
681     struct intel_batchbuffer *batch = encoder_context->base.batch;
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     int i;
685
686     BEGIN_BCS_BATCH(batch, 71);
687
688     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
689
690     /* Reference frames and Current frames */
691     /* the DW1-32 is for the direct MV for reference */
692     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
693         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
694             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697             OUT_BCS_BATCH(batch, 0);
698         } else {
699             OUT_BCS_BATCH(batch, 0);
700             OUT_BCS_BATCH(batch, 0);
701         }
702     }
703         OUT_BCS_BATCH(batch, 0);
704
705         /* the DW34-36 is the MV for the current reference */
706         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
707                           I915_GEM_DOMAIN_INSTRUCTION, 0,
708                           0);
709
710         OUT_BCS_BATCH(batch, 0);
711         OUT_BCS_BATCH(batch, 0);
712
713     /* POL list */
714     for(i = 0; i < 32; i++) {
715         OUT_BCS_BATCH(batch, i/2);
716     }
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719
720     ADVANCE_BCS_BATCH(batch);
721 }
722
723 static void
724 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
725 {
726     struct intel_batchbuffer *batch = encoder_context->base.batch;
727     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     int i;
730
731     if (IS_STEPPING_BPLUS(i965)) {
732         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
733         return;
734     }
735
736     BEGIN_BCS_BATCH(batch, 69);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
739
740     /* Reference frames and Current frames */
741     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
742         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
743             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
744                           I915_GEM_DOMAIN_INSTRUCTION, 0,
745                           0);
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     /* POL list */
752     for(i = 0; i < 32; i++) {
753         OUT_BCS_BATCH(batch, i/2);
754     }
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757
758     ADVANCE_BCS_BATCH(batch);
759 }
760
761
762 static void
763 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
764                                 struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
768
769     BEGIN_BCS_BATCH(batch, 10);
770
771     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
772     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
773                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
774                   0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777         
778         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782
783         /* the DW7-9 is for Bitplane Read Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static void
792 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
793 {
794     struct intel_batchbuffer *batch = encoder_context->base.batch;
795     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797
798     if (IS_STEPPING_BPLUS(i965)) {
799         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
800         return;
801     }
802
803     BEGIN_BCS_BATCH(batch, 4);
804
805     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
806     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808                   0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815
816 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
817                                       struct encode_state *encode_state,
818                                       struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
821
822     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
823     mfc_context->set_surface_state(ctx, encoder_context);
824     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
825     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
826     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
827     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
828     mfc_context->avc_qm_state(ctx, encoder_context);
829     mfc_context->avc_fqm_state(ctx, encoder_context);
830     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
831     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
832 }
833
834
835 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
836                              struct encode_state *encode_state,
837                              struct intel_encoder_context *encoder_context)
838 {
839     struct intel_batchbuffer *batch = encoder_context->base.batch;
840
841     intel_batchbuffer_flush(batch);             //run the pipeline
842
843     return VA_STATUS_SUCCESS;
844 }
845
846
847 static VAStatus
848 gen75_mfc_stop(VADriverContextP ctx, 
849               struct encode_state *encode_state,
850               struct intel_encoder_context *encoder_context,
851               int *encoded_bits_size)
852 {
853     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
854     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855     VACodedBufferSegment *coded_buffer_segment;
856     
857     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
858     assert(vaStatus == VA_STATUS_SUCCESS);
859     *encoded_bits_size = coded_buffer_segment->size * 8;
860     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
861
862     return VA_STATUS_SUCCESS;
863 }
864
865
866 static void
867 gen75_mfc_avc_slice_state(VADriverContextP ctx,
868                          VAEncPictureParameterBufferH264 *pic_param,
869                          VAEncSliceParameterBufferH264 *slice_param,
870                          struct encode_state *encode_state,
871                          struct intel_encoder_context *encoder_context,
872                          int rate_control_enable,
873                          int qp,
874                          struct intel_batchbuffer *batch)
875 {
876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
877     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
878     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
879     int beginmb = slice_param->macroblock_address;
880     int endmb = beginmb + slice_param->num_macroblocks;
881     int beginx = beginmb % width_in_mbs;
882     int beginy = beginmb / width_in_mbs;
883     int nextx =  endmb % width_in_mbs;
884     int nexty = endmb / width_in_mbs;
885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
886     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
887     int maxQpN, maxQpP;
888     unsigned char correct[6], grow, shrink;
889     int i;
890     int bslice = 0;
891     int weighted_pred_idc = 0;
892     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
893     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
894
895     if (batch == NULL)
896         batch = encoder_context->base.batch;
897
898     if (slice_type == SLICE_TYPE_P) {
899         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
900     } else if (slice_type == SLICE_TYPE_B) {
901         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
902         bslice = 1;
903
904         if (weighted_pred_idc == 2) {
905             /* 8.4.3 - Derivation process for prediction weights (8-279) */
906             luma_log2_weight_denom = 5;
907             chroma_log2_weight_denom = 5;
908         }
909     }
910
911     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
912     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
913
914     for (i = 0; i < 6; i++)
915         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
916
917     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
918         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
919     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
920         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
921
922     BEGIN_BCS_BATCH(batch, 11);;
923
924     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
925     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
926
927     if (slice_type == SLICE_TYPE_I) {
928         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
929     } else {
930         OUT_BCS_BATCH(batch,
931                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
932                       (chroma_log2_weight_denom << 8) |
933                       (luma_log2_weight_denom << 0));
934     }
935
936     OUT_BCS_BATCH(batch, 
937                   (weighted_pred_idc << 30) |
938                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
939                   (slice_param->disable_deblocking_filter_idc << 27) |
940                   (slice_param->cabac_init_idc << 24) |
941                   (qp<<16) |                    /*Slice Quantization Parameter*/
942                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
943                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
944     OUT_BCS_BATCH(batch,
945                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
946                   (beginx << 16) |
947                   slice_param->macroblock_address );
948     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
949     OUT_BCS_BATCH(batch, 
950                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
951                   (1 << 30) |           /*ResetRateControlCounter*/
952                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
953                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
954                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
955                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
956                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
957                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
958                   (last_slice << 19) |     /*IsLastSlice*/
959                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
960                   (1 << 17) |       /*HeaderPresentFlag*/       
961                   (1 << 16) |       /*SliceData PresentFlag*/
962                   (1 << 15) |       /*TailPresentFlag*/
963                   (1 << 13) |       /*RBSP NAL TYPE*/   
964                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
965     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
966     OUT_BCS_BATCH(batch,
967                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
968                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
969                   (shrink << 8)  |
970                   (grow << 0));   
971     OUT_BCS_BATCH(batch,
972                   (correct[5] << 20) |
973                   (correct[4] << 16) |
974                   (correct[3] << 12) |
975                   (correct[2] << 8) |
976                   (correct[1] << 4) |
977                   (correct[0] << 0));
978     OUT_BCS_BATCH(batch, 0);
979
980     ADVANCE_BCS_BATCH(batch);
981 }
982
983
984 #ifdef MFC_SOFTWARE_HASWELL
985
986 static int
987 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
988                                 int qp,unsigned int *msg,
989                               struct intel_encoder_context *encoder_context,
990                               unsigned char target_mb_size, unsigned char max_mb_size,
991                               struct intel_batchbuffer *batch)
992 {
993     int len_in_dwords = 12;
994     unsigned int intra_msg;
995 #define         INTRA_MSG_FLAG          (1 << 13)
996 #define         INTRA_MBTYPE_MASK       (0x1F0000)
997     if (batch == NULL)
998         batch = encoder_context->base.batch;
999
1000     BEGIN_BCS_BATCH(batch, len_in_dwords);
1001
1002     intra_msg = msg[0] & 0xC0FF;
1003     intra_msg |= INTRA_MSG_FLAG;
1004     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1005     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1006     OUT_BCS_BATCH(batch, 0);
1007     OUT_BCS_BATCH(batch, 0);
1008     OUT_BCS_BATCH(batch, 
1009                   (0 << 24) |           /* PackedMvNum, Debug*/
1010                   (0 << 20) |           /* No motion vector */
1011                   (1 << 19) |           /* CbpDcY */
1012                   (1 << 18) |           /* CbpDcU */
1013                   (1 << 17) |           /* CbpDcV */
1014                   intra_msg);
1015
1016     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1017     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1018     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1019
1020     /*Stuff for Intra MB*/
1021     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1022     OUT_BCS_BATCH(batch, msg[2]);       
1023     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1024     
1025     /*MaxSizeInWord and TargetSzieInWord*/
1026     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1027                   (target_mb_size << 16) );
1028
1029     OUT_BCS_BATCH(batch, 0);
1030
1031     ADVANCE_BCS_BATCH(batch);
1032
1033     return len_in_dwords;
1034 }
1035
1036 static int
1037 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1038                               unsigned int *msg, unsigned int offset,
1039                               struct intel_encoder_context *encoder_context,
1040                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1041                               struct intel_batchbuffer *batch)
1042 {
1043     int len_in_dwords = 12;
1044         unsigned int inter_msg = 0;
1045     if (batch == NULL)
1046         batch = encoder_context->base.batch;
1047     {
1048 #define MSG_MV_OFFSET   4
1049         unsigned int *mv_ptr;
1050         mv_ptr = msg + MSG_MV_OFFSET;
1051         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1052          * to convert them to be compatible with the format of AVC_PAK
1053          * command.
1054          */
1055         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1056                 /* MV[0] and MV[2] are replicated */
1057                 mv_ptr[4] = mv_ptr[0];
1058                 mv_ptr[5] = mv_ptr[1];
1059                 mv_ptr[2] = mv_ptr[8];
1060                 mv_ptr[3] = mv_ptr[9];
1061                 mv_ptr[6] = mv_ptr[8]; 
1062                 mv_ptr[7] = mv_ptr[9]; 
1063         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1064                 /* MV[0] and MV[1] are replicated */
1065                 mv_ptr[2] = mv_ptr[0];  
1066                 mv_ptr[3] = mv_ptr[1];
1067                 mv_ptr[4] = mv_ptr[16]; 
1068                 mv_ptr[5] = mv_ptr[17]; 
1069                 mv_ptr[6] = mv_ptr[24];
1070                 mv_ptr[7] = mv_ptr[25];
1071         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1072                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1073                 /* Don't touch MV[0] or MV[1] */
1074                 mv_ptr[2] = mv_ptr[8];
1075                 mv_ptr[3] = mv_ptr[9];
1076                 mv_ptr[4] = mv_ptr[16];
1077                 mv_ptr[5] = mv_ptr[17];
1078                 mv_ptr[6] = mv_ptr[24];
1079                 mv_ptr[7] = mv_ptr[25];
1080         }
1081     }
1082
1083     BEGIN_BCS_BATCH(batch, len_in_dwords);
1084
1085     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1086
1087         inter_msg = 32;
1088         /* MV quantity */
1089         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1090                 if (msg[1] & SUBMB_SHAPE_MASK)
1091                         inter_msg = 128;
1092         }
1093     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1094     OUT_BCS_BATCH(batch, offset);
1095         inter_msg = msg[0] & (0x1F00FFFF);
1096         inter_msg |= INTER_MV8;
1097         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1098         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1099                         (msg[1] & SUBMB_SHAPE_MASK)) {
1100                 inter_msg |= INTER_MV32;
1101         }
1102
1103     OUT_BCS_BATCH(batch, inter_msg);
1104
1105     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1106     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1107 #if 0 
1108     if ( slice_type == SLICE_TYPE_B) {
1109         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1110     } else {
1111         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1112     }
1113 #else
1114     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1115 #endif
1116
1117         inter_msg = msg[1] >> 8;
1118     /*Stuff for Inter MB*/
1119     OUT_BCS_BATCH(batch, inter_msg);        
1120     OUT_BCS_BATCH(batch, 0x0);    
1121     OUT_BCS_BATCH(batch, 0x0);        
1122
1123     /*MaxSizeInWord and TargetSzieInWord*/
1124     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1125                   (target_mb_size << 16) );
1126
1127     OUT_BCS_BATCH(batch, 0x0);    
1128
1129     ADVANCE_BCS_BATCH(batch);
1130
1131     return len_in_dwords;
1132 }
1133
1134 #define         AVC_INTRA_RDO_OFFSET    4
1135 #define         AVC_INTER_RDO_OFFSET    10
1136 #define         AVC_INTER_MSG_OFFSET    8       
1137 #define         AVC_INTER_MV_OFFSET             48
1138 #define         AVC_RDO_MASK            0xFFFF
1139
1140 static void 
1141 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1142                                        struct encode_state *encode_state,
1143                                        struct intel_encoder_context *encoder_context,
1144                                        int slice_index,
1145                                        struct intel_batchbuffer *slice_batch)
1146 {
1147     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1148     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1149     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1150     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1151     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1152     unsigned int *msg = NULL, offset = 0;
1153     unsigned char *msg_ptr = NULL;
1154     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1155     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1156     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1157     int i,x,y;
1158     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1159     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1160     unsigned char *slice_header = NULL;
1161     int slice_header_length_in_bits = 0;
1162     unsigned int tail_data[] = { 0x0, 0x0 };
1163     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1164     int is_intra = slice_type == SLICE_TYPE_I;
1165
1166     if (rate_control_mode == VA_RC_CBR) {
1167         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1168         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1169     }
1170
1171     /* only support for 8-bit pixel bit-depth */
1172     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1173     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1174     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1175     assert(qp >= 0 && qp < 52);
1176
1177     gen75_mfc_avc_slice_state(ctx, 
1178                              pPicParameter,
1179                              pSliceParameter,
1180                              encode_state, encoder_context,
1181                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1182
1183     if ( slice_index == 0) 
1184         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1185
1186     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1187
1188     // slice hander
1189     mfc_context->insert_object(ctx, encoder_context,
1190                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1191                                5,  /* first 5 bytes are start code + nal unit type */
1192                                1, 0, 1, slice_batch);
1193
1194     dri_bo_map(vme_context->vme_output.bo , 1);
1195     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1196
1197     if (is_intra) {
1198         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1199     } else {
1200         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1201     }
1202    
1203     for (i = pSliceParameter->macroblock_address; 
1204          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1205         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1206         x = i % width_in_mbs;
1207         y = i / width_in_mbs;
1208         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1209
1210         if (is_intra) {
1211             assert(msg);
1212             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1213         } else {
1214             int inter_rdo, intra_rdo;
1215             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1216             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1217             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1218             if (intra_rdo < inter_rdo) { 
1219                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1220             } else {
1221                 msg += AVC_INTER_MSG_OFFSET;
1222                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1223             }
1224         }
1225     }
1226    
1227     dri_bo_unmap(vme_context->vme_output.bo);
1228
1229     if ( last_slice ) {    
1230         mfc_context->insert_object(ctx, encoder_context,
1231                                    tail_data, 2, 8,
1232                                    2, 1, 1, 0, slice_batch);
1233     } else {
1234         mfc_context->insert_object(ctx, encoder_context,
1235                                    tail_data, 1, 8,
1236                                    1, 1, 1, 0, slice_batch);
1237     }
1238
1239     free(slice_header);
1240
1241 }
1242
1243 static dri_bo *
1244 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1245                                   struct encode_state *encode_state,
1246                                   struct intel_encoder_context *encoder_context)
1247 {
1248     struct i965_driver_data *i965 = i965_driver_data(ctx);
1249     struct intel_batchbuffer *batch;
1250     dri_bo *batch_bo;
1251     int i;
1252     int buffer_size;
1253     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1254     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1255     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1256
1257     buffer_size = width_in_mbs * height_in_mbs * 64;
1258     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1259     batch_bo = batch->buffer;
1260     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1261         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1262     }
1263
1264     intel_batchbuffer_align(batch, 8);
1265     
1266     BEGIN_BCS_BATCH(batch, 2);
1267     OUT_BCS_BATCH(batch, 0);
1268     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1269     ADVANCE_BCS_BATCH(batch);
1270
1271     dri_bo_reference(batch_bo);
1272     intel_batchbuffer_free(batch);
1273
1274     return batch_bo;
1275 }
1276
1277 #else
1278
1279 static void
1280 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1281                                     struct encode_state *encode_state,
1282                                     struct intel_encoder_context *encoder_context)
1283
1284 {
1285     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1286     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1287
1288     assert(vme_context->vme_output.bo);
1289     mfc_context->buffer_suface_setup(ctx,
1290                                      &mfc_context->gpe_context,
1291                                      &vme_context->vme_output,
1292                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1293                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1294     assert(mfc_context->aux_batchbuffer_surface.bo);
1295     mfc_context->buffer_suface_setup(ctx,
1296                                      &mfc_context->gpe_context,
1297                                      &mfc_context->aux_batchbuffer_surface,
1298                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1299                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1300 }
1301
1302 static void
1303 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1304                                      struct encode_state *encode_state,
1305                                      struct intel_encoder_context *encoder_context)
1306
1307 {
1308     struct i965_driver_data *i965 = i965_driver_data(ctx);
1309     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1310     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1311     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1312     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1313     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1314     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1315     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1316     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1317                                                            "MFC batchbuffer",
1318                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1319                                                            0x1000);
1320     mfc_context->buffer_suface_setup(ctx,
1321                                      &mfc_context->gpe_context,
1322                                      &mfc_context->mfc_batchbuffer_surface,
1323                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1324                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1325 }
1326
1327 static void
1328 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1329                                     struct encode_state *encode_state,
1330                                     struct intel_encoder_context *encoder_context)
1331 {
1332     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1333     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1334 }
1335
1336 static void
1337 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1338                                 struct encode_state *encode_state,
1339                                 struct intel_encoder_context *encoder_context)
1340 {
1341     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1342     struct gen6_interface_descriptor_data *desc;   
1343     int i;
1344     dri_bo *bo;
1345
1346     bo = mfc_context->gpe_context.idrt.bo;
1347     dri_bo_map(bo, 1);
1348     assert(bo->virtual);
1349     desc = bo->virtual;
1350
1351     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1352         struct i965_kernel *kernel;
1353
1354         kernel = &mfc_context->gpe_context.kernels[i];
1355         assert(sizeof(*desc) == 32);
1356
1357         /*Setup the descritor table*/
1358         memset(desc, 0, sizeof(*desc));
1359         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1360         desc->desc2.sampler_count = 0;
1361         desc->desc2.sampler_state_pointer = 0;
1362         desc->desc3.binding_table_entry_count = 2;
1363         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1364         desc->desc4.constant_urb_entry_read_offset = 0;
1365         desc->desc4.constant_urb_entry_read_length = 4;
1366                 
1367         /*kernel start*/
1368         dri_bo_emit_reloc(bo,   
1369                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1370                           0,
1371                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1372                           kernel->bo);
1373         desc++;
1374     }
1375
1376     dri_bo_unmap(bo);
1377 }
1378
1379 static void
1380 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1381                                     struct encode_state *encode_state,
1382                                     struct intel_encoder_context *encoder_context)
1383 {
1384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1385     
1386     (void)mfc_context;
1387 }
1388
1389 static void
1390 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1391                                          int index,
1392                                          int head_offset,
1393                                          int batchbuffer_offset,
1394                                          int head_size,
1395                                          int tail_size,
1396                                          int number_mb_cmds,
1397                                          int first_object,
1398                                          int last_object,
1399                                          int last_slice,
1400                                          int mb_x,
1401                                          int mb_y,
1402                                          int width_in_mbs,
1403                                          int qp)
1404 {
1405     BEGIN_BATCH(batch, 12);
1406     
1407     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1408     OUT_BATCH(batch, index);
1409     OUT_BATCH(batch, 0);
1410     OUT_BATCH(batch, 0);
1411     OUT_BATCH(batch, 0);
1412     OUT_BATCH(batch, 0);
1413    
1414     /*inline data */
1415     OUT_BATCH(batch, head_offset);
1416     OUT_BATCH(batch, batchbuffer_offset);
1417     OUT_BATCH(batch, 
1418               head_size << 16 |
1419               tail_size);
1420     OUT_BATCH(batch,
1421               number_mb_cmds << 16 |
1422               first_object << 2 |
1423               last_object << 1 |
1424               last_slice);
1425     OUT_BATCH(batch,
1426               mb_y << 8 |
1427               mb_x);
1428     OUT_BATCH(batch,
1429               qp << 16 |
1430               width_in_mbs);
1431
1432     ADVANCE_BATCH(batch);
1433 }
1434
1435 static void
1436 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1437                                        struct intel_encoder_context *encoder_context,
1438                                        VAEncSliceParameterBufferH264 *slice_param,
1439                                        int head_offset,
1440                                        unsigned short head_size,
1441                                        unsigned short tail_size,
1442                                        int batchbuffer_offset,
1443                                        int qp,
1444                                        int last_slice)
1445 {
1446     struct intel_batchbuffer *batch = encoder_context->base.batch;
1447     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1448     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1449     int total_mbs = slice_param->num_macroblocks;
1450     int number_mb_cmds = 128;
1451     int starting_mb = 0;
1452     int last_object = 0;
1453     int first_object = 1;
1454     int i;
1455     int mb_x, mb_y;
1456     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1457
1458     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1459         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1460         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1461         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1462         assert(mb_x <= 255 && mb_y <= 255);
1463
1464         starting_mb += number_mb_cmds;
1465
1466         gen75_mfc_batchbuffer_emit_object_command(batch,
1467                                                  index,
1468                                                  head_offset,
1469                                                  batchbuffer_offset,
1470                                                  head_size,
1471                                                  tail_size,
1472                                                  number_mb_cmds,
1473                                                  first_object,
1474                                                  last_object,
1475                                                  last_slice,
1476                                                  mb_x,
1477                                                  mb_y,
1478                                                  width_in_mbs,
1479                                                  qp);
1480
1481         if (first_object) {
1482             head_offset += head_size;
1483             batchbuffer_offset += head_size;
1484         }
1485
1486         if (last_object) {
1487             head_offset += tail_size;
1488             batchbuffer_offset += tail_size;
1489         }
1490
1491         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1492
1493         first_object = 0;
1494     }
1495
1496     if (!last_object) {
1497         last_object = 1;
1498         number_mb_cmds = total_mbs % number_mb_cmds;
1499         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1500         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1501         assert(mb_x <= 255 && mb_y <= 255);
1502         starting_mb += number_mb_cmds;
1503
1504         gen75_mfc_batchbuffer_emit_object_command(batch,
1505                                                  index,
1506                                                  head_offset,
1507                                                  batchbuffer_offset,
1508                                                  head_size,
1509                                                  tail_size,
1510                                                  number_mb_cmds,
1511                                                  first_object,
1512                                                  last_object,
1513                                                  last_slice,
1514                                                  mb_x,
1515                                                  mb_y,
1516                                                  width_in_mbs,
1517                                                  qp);
1518     }
1519 }
1520                           
1521 /*
1522  * return size in Owords (16bytes)
1523  */         
1524 static int
1525 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1526                                struct encode_state *encode_state,
1527                                struct intel_encoder_context *encoder_context,
1528                                int slice_index,
1529                                int batchbuffer_offset)
1530 {
1531     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1532     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1533     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1534     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1535     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1536     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1537     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1538     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1539     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1540     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1541     unsigned char *slice_header = NULL;
1542     int slice_header_length_in_bits = 0;
1543     unsigned int tail_data[] = { 0x0, 0x0 };
1544     long head_offset;
1545     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1546     unsigned short head_size, tail_size;
1547     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1548
1549     if (rate_control_mode == VA_RC_CBR) {
1550         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1551         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1552     }
1553
1554     /* only support for 8-bit pixel bit-depth */
1555     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1556     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1557     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1558     assert(qp >= 0 && qp < 52);
1559
1560     head_offset = old_used / 16;
1561     gen75_mfc_avc_slice_state(ctx,
1562                              pPicParameter,
1563                              pSliceParameter,
1564                              encode_state,
1565                              encoder_context,
1566                              (rate_control_mode == VA_RC_CBR),
1567                              qp,
1568                              slice_batch);
1569
1570     if (slice_index == 0)
1571         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1572
1573     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1574
1575     // slice hander
1576     mfc_context->insert_object(ctx,
1577                                encoder_context,
1578                                (unsigned int *)slice_header,
1579                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1580                                slice_header_length_in_bits & 0x1f,
1581                                5,  /* first 5 bytes are start code + nal unit type */
1582                                1,
1583                                0,
1584                                1,
1585                                slice_batch);
1586     free(slice_header);
1587
1588     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1589     used = intel_batchbuffer_used_size(slice_batch);
1590     head_size = (used - old_used) / 16;
1591     old_used = used;
1592
1593     /* tail */
1594     if (last_slice) {    
1595         mfc_context->insert_object(ctx,
1596                                    encoder_context,
1597                                    tail_data,
1598                                    2,
1599                                    8,
1600                                    2,
1601                                    1,
1602                                    1,
1603                                    0,
1604                                    slice_batch);
1605     } else {
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    1,
1610                                    8,
1611                                    1,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     }
1617
1618     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1619     used = intel_batchbuffer_used_size(slice_batch);
1620     tail_size = (used - old_used) / 16;
1621
1622    
1623     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1624                                            encoder_context,
1625                                            pSliceParameter,
1626                                            head_offset,
1627                                            head_size,
1628                                            tail_size,
1629                                            batchbuffer_offset,
1630                                            qp,
1631                                            last_slice);
1632
1633     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1634 }
1635
1636 static void
1637 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1638                                   struct encode_state *encode_state,
1639                                   struct intel_encoder_context *encoder_context)
1640 {
1641     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1642     struct intel_batchbuffer *batch = encoder_context->base.batch;
1643     int i, size, offset = 0;
1644     intel_batchbuffer_start_atomic(batch, 0x4000); 
1645     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1646
1647     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1648         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1649         offset += size;
1650     }
1651
1652     intel_batchbuffer_end_atomic(batch);
1653     intel_batchbuffer_flush(batch);
1654 }
1655
1656 static void
1657 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1658                                struct encode_state *encode_state,
1659                                struct intel_encoder_context *encoder_context)
1660 {
1661     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1662     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1663     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1664     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1665 }
1666
1667 static dri_bo *
1668 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1669                                   struct encode_state *encode_state,
1670                                   struct intel_encoder_context *encoder_context)
1671 {
1672     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1673
1674     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1675     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1676
1677     return mfc_context->mfc_batchbuffer_surface.bo;
1678 }
1679
1680 #endif
1681
1682 static void
1683 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1684                                  struct encode_state *encode_state,
1685                                  struct intel_encoder_context *encoder_context)
1686 {
1687     struct intel_batchbuffer *batch = encoder_context->base.batch;
1688     dri_bo *slice_batch_bo;
1689
1690     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1691         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1692         assert(0);
1693         return; 
1694     }
1695
1696 #ifdef MFC_SOFTWARE_HASWELL
1697     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1698 #else
1699     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1700 #endif
1701
1702     // begin programing
1703     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1704     intel_batchbuffer_emit_mi_flush(batch);
1705     
1706     // picture level programing
1707     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1708
1709     BEGIN_BCS_BATCH(batch, 2);
1710     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1711     OUT_BCS_RELOC(batch,
1712                   slice_batch_bo,
1713                   I915_GEM_DOMAIN_COMMAND, 0, 
1714                   0);
1715     ADVANCE_BCS_BATCH(batch);
1716
1717     // end programing
1718     intel_batchbuffer_end_atomic(batch);
1719
1720     dri_bo_unreference(slice_batch_bo);
1721 }
1722
1723
1724 static VAStatus
1725 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1726                             struct encode_state *encode_state,
1727                             struct intel_encoder_context *encoder_context)
1728 {
1729     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1730     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1731     int current_frame_bits_size;
1732     int sts;
1733  
1734     for (;;) {
1735         gen75_mfc_init(ctx, encode_state, encoder_context);
1736         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1737         /*Programing bcs pipeline*/
1738         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1739         gen75_mfc_run(ctx, encode_state, encoder_context);
1740         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1741             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1742             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1743             if (sts == BRC_NO_HRD_VIOLATION) {
1744                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1745                 break;
1746             }
1747             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1748                 if (!mfc_context->hrd.violation_noted) {
1749                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1750                     mfc_context->hrd.violation_noted = 1;
1751                 }
1752                 return VA_STATUS_SUCCESS;
1753             }
1754         } else {
1755             break;
1756         }
1757     }
1758
1759     return VA_STATUS_SUCCESS;
1760 }
1761
1762 /*
1763  * MPEG-2
1764  */
1765
1766 static const int
1767 va_to_gen75_mpeg2_picture_type[3] = {
1768     1,  /* I */
1769     2,  /* P */
1770     3   /* B */
1771 };
1772
1773 static void
1774 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1775                           struct intel_encoder_context *encoder_context,
1776                           struct encode_state *encode_state)
1777 {
1778     struct intel_batchbuffer *batch = encoder_context->base.batch;
1779     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1780     VAEncPictureParameterBufferMPEG2 *pic_param;
1781     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1782     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1783
1784     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1785     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1786
1787     BEGIN_BCS_BATCH(batch, 13);
1788     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1789     OUT_BCS_BATCH(batch,
1790                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1791                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1792                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1793                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1794                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1795                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1796                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1797                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1798                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1799                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1800                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1801                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1802     OUT_BCS_BATCH(batch,
1803                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1804                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1805                   0);
1806     OUT_BCS_BATCH(batch,
1807                   1 << 31 |     /* slice concealment */
1808                   (height_in_mbs - 1) << 16 |
1809                   (width_in_mbs - 1));
1810     OUT_BCS_BATCH(batch, 0);
1811     OUT_BCS_BATCH(batch, 0);
1812     OUT_BCS_BATCH(batch,
1813                   0xFFF << 16 | /* InterMBMaxSize */
1814                   0xFFF << 0 |  /* IntraMBMaxSize */
1815                   0);
1816     OUT_BCS_BATCH(batch, 0);
1817     OUT_BCS_BATCH(batch, 0);
1818     OUT_BCS_BATCH(batch, 0);
1819     OUT_BCS_BATCH(batch, 0);
1820     OUT_BCS_BATCH(batch, 0);
1821     OUT_BCS_BATCH(batch, 0);
1822     ADVANCE_BCS_BATCH(batch);
1823 }
1824
1825 static void
1826 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1827 {
1828     unsigned char intra_qm[64] = {
1829          8, 16, 19, 22, 26, 27, 29, 34,
1830         16, 16, 22, 24, 27, 29, 34, 37,
1831         19, 22, 26, 27, 29, 34, 34, 38,
1832         22, 22, 26, 27, 29, 34, 37, 40,
1833         22, 26, 27, 29, 32, 35, 40, 48,
1834         26, 27, 29, 32, 35, 40, 48, 58,
1835         26, 27, 29, 34, 38, 46, 56, 69,
1836         27, 29, 35, 38, 46, 56, 69, 83
1837     };
1838
1839     unsigned char non_intra_qm[64] = {
1840         16, 16, 16, 16, 16, 16, 16, 16,
1841         16, 16, 16, 16, 16, 16, 16, 16,
1842         16, 16, 16, 16, 16, 16, 16, 16,
1843         16, 16, 16, 16, 16, 16, 16, 16,
1844         16, 16, 16, 16, 16, 16, 16, 16,
1845         16, 16, 16, 16, 16, 16, 16, 16,
1846         16, 16, 16, 16, 16, 16, 16, 16,
1847         16, 16, 16, 16, 16, 16, 16, 16
1848     };
1849
1850     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1851     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1852 }
1853
1854 static void
1855 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1856 {
1857     unsigned short intra_fqm[64] = {
1858          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1859          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1860          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1861          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1862          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1863          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1864          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1865          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1866     };
1867
1868     unsigned short non_intra_fqm[64] = {
1869         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1875         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1876         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877     };
1878
1879     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1880     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1881 }
1882
1883 static void
1884 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1885                                  struct intel_encoder_context *encoder_context,
1886                                  int x, int y,
1887                                  int next_x, int next_y,
1888                                  int is_fisrt_slice_group,
1889                                  int is_last_slice_group,
1890                                  int intra_slice,
1891                                  int qp,
1892                                  struct intel_batchbuffer *batch)
1893 {
1894     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1895
1896     if (batch == NULL)
1897         batch = encoder_context->base.batch;
1898
1899     BEGIN_BCS_BATCH(batch, 8);
1900
1901     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1902     OUT_BCS_BATCH(batch,
1903                   0 << 31 |                             /* MbRateCtrlFlag */
1904                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1905                   1 << 17 |                             /* Insert Header before the first slice group data */
1906                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1907                   1 << 15 |                             /* TailPresentFlag: always 1 */
1908                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1909                   !!intra_slice << 13 |                 /* IntraSlice */
1910                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1911                   0);
1912     OUT_BCS_BATCH(batch,
1913                   next_y << 24 |
1914                   next_x << 16 |
1915                   y << 8 |
1916                   x << 0 |
1917                   0);
1918     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1919     /* bitstream pointer is only loaded once for the first slice of a frame when 
1920      * LoadSlicePointerFlag is 0
1921      */
1922     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1923     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1924     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1925     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1926
1927     ADVANCE_BCS_BATCH(batch);
1928 }
1929
1930 static int
1931 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1932                                  struct intel_encoder_context *encoder_context,
1933                                  int x, int y,
1934                                  int first_mb_in_slice,
1935                                  int last_mb_in_slice,
1936                                  int first_mb_in_slice_group,
1937                                  int last_mb_in_slice_group,
1938                                  int mb_type,
1939                                  int qp_scale_code,
1940                                  int coded_block_pattern,
1941                                  unsigned char target_size_in_word,
1942                                  unsigned char max_size_in_word,
1943                                  struct intel_batchbuffer *batch)
1944 {
1945     int len_in_dwords = 9;
1946
1947     if (batch == NULL)
1948         batch = encoder_context->base.batch;
1949
1950     BEGIN_BCS_BATCH(batch, len_in_dwords);
1951
1952     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1953     OUT_BCS_BATCH(batch,
1954                   0 << 24 |     /* PackedMvNum */
1955                   0 << 20 |     /* MvFormat */
1956                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1957                   0 << 15 |     /* TransformFlag: frame DCT */
1958                   0 << 14 |     /* FieldMbFlag */
1959                   1 << 13 |     /* IntraMbFlag */
1960                   mb_type << 8 |   /* MbType: Intra */
1961                   0 << 2 |      /* SkipMbFlag */
1962                   0 << 0 |      /* InterMbMode */
1963                   0);
1964     OUT_BCS_BATCH(batch, y << 16 | x);
1965     OUT_BCS_BATCH(batch,
1966                   max_size_in_word << 24 |
1967                   target_size_in_word << 16 |
1968                   coded_block_pattern << 6 |      /* CBP */
1969                   0);
1970     OUT_BCS_BATCH(batch,
1971                   last_mb_in_slice << 31 |
1972                   first_mb_in_slice << 30 |
1973                   0 << 27 |     /* EnableCoeffClamp */
1974                   last_mb_in_slice_group << 26 |
1975                   0 << 25 |     /* MbSkipConvDisable */
1976                   first_mb_in_slice_group << 24 |
1977                   0 << 16 |     /* MvFieldSelect */
1978                   qp_scale_code << 0 |
1979                   0);
1980     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1981     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1982     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1983     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1984
1985     ADVANCE_BCS_BATCH(batch);
1986
1987     return len_in_dwords;
1988 }
1989
1990 #define MPEG2_INTER_MV_OFFSET   12 
1991
1992 static struct _mv_ranges
1993 {
1994     int low;    /* in the unit of 1/2 pixel */
1995     int high;   /* in the unit of 1/2 pixel */
1996 } mv_ranges[] = {
1997     {0, 0},
1998     {-16, 15},
1999     {-32, 31},
2000     {-64, 63},
2001     {-128, 127},
2002     {-256, 255},
2003     {-512, 511},
2004     {-1024, 1023},
2005     {-2048, 2047},
2006     {-4096, 4095}
2007 };
2008
2009 static int
2010 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2011 {
2012     if (mv + pos * 16 * 2 < 0 ||
2013         mv + (pos + 1) * 16 * 2 > display_max * 2)
2014         mv = 0;
2015
2016     if (f_code > 0 && f_code < 10) {
2017         if (mv < mv_ranges[f_code].low)
2018             mv = mv_ranges[f_code].low;
2019
2020         if (mv > mv_ranges[f_code].high)
2021             mv = mv_ranges[f_code].high;
2022     }
2023
2024     return mv;
2025 }
2026
2027 static int
2028 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2029                                  struct encode_state *encode_state,
2030                                  struct intel_encoder_context *encoder_context,
2031                                  unsigned int *msg,
2032                                  int width_in_mbs, int height_in_mbs,
2033                                  int x, int y,
2034                                  int first_mb_in_slice,
2035                                  int last_mb_in_slice,
2036                                  int first_mb_in_slice_group,
2037                                  int last_mb_in_slice_group,
2038                                  int qp_scale_code,
2039                                  unsigned char target_size_in_word,
2040                                  unsigned char max_size_in_word,
2041                                  struct intel_batchbuffer *batch)
2042 {
2043     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2044     int len_in_dwords = 9;
2045     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2046     
2047     if (batch == NULL)
2048         batch = encoder_context->base.batch;
2049
2050     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2051     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2052     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2053     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2054     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2055
2056     BEGIN_BCS_BATCH(batch, len_in_dwords);
2057
2058     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2059     OUT_BCS_BATCH(batch,
2060                   2 << 24 |     /* PackedMvNum */
2061                   7 << 20 |     /* MvFormat */
2062                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2063                   0 << 15 |     /* TransformFlag: frame DCT */
2064                   0 << 14 |     /* FieldMbFlag */
2065                   0 << 13 |     /* IntraMbFlag */
2066                   1 << 8 |      /* MbType: Frame-based */
2067                   0 << 2 |      /* SkipMbFlag */
2068                   0 << 0 |      /* InterMbMode */
2069                   0);
2070     OUT_BCS_BATCH(batch, y << 16 | x);
2071     OUT_BCS_BATCH(batch,
2072                   max_size_in_word << 24 |
2073                   target_size_in_word << 16 |
2074                   0x3f << 6 |   /* CBP */
2075                   0);
2076     OUT_BCS_BATCH(batch,
2077                   last_mb_in_slice << 31 |
2078                   first_mb_in_slice << 30 |
2079                   0 << 27 |     /* EnableCoeffClamp */
2080                   last_mb_in_slice_group << 26 |
2081                   0 << 25 |     /* MbSkipConvDisable */
2082                   first_mb_in_slice_group << 24 |
2083                   0 << 16 |     /* MvFieldSelect */
2084                   qp_scale_code << 0 |
2085                   0);
2086
2087     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2088     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2089     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2090     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2091
2092     ADVANCE_BCS_BATCH(batch);
2093
2094     return len_in_dwords;
2095 }
2096
2097 static void
2098 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2099                                            struct encode_state *encode_state,
2100                                            struct intel_encoder_context *encoder_context,
2101                                            struct intel_batchbuffer *slice_batch)
2102 {
2103     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2104     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2105
2106     if (encode_state->packed_header_data[idx]) {
2107         VAEncPackedHeaderParameterBuffer *param = NULL;
2108         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2109         unsigned int length_in_bits;
2110
2111         assert(encode_state->packed_header_param[idx]);
2112         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2113         length_in_bits = param->bit_length;
2114
2115         mfc_context->insert_object(ctx,
2116                                    encoder_context,
2117                                    header_data,
2118                                    ALIGN(length_in_bits, 32) >> 5,
2119                                    length_in_bits & 0x1f,
2120                                    5,   /* FIXME: check it */
2121                                    0,
2122                                    0,
2123                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2124                                    slice_batch);
2125     }
2126
2127     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2128
2129     if (encode_state->packed_header_data[idx]) {
2130         VAEncPackedHeaderParameterBuffer *param = NULL;
2131         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2132         unsigned int length_in_bits;
2133
2134         assert(encode_state->packed_header_param[idx]);
2135         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2136         length_in_bits = param->bit_length;
2137
2138         mfc_context->insert_object(ctx,
2139                                    encoder_context,
2140                                    header_data,
2141                                    ALIGN(length_in_bits, 32) >> 5,
2142                                    length_in_bits & 0x1f,
2143                                    5,   /* FIXME: check it */
2144                                    0,
2145                                    0,
2146                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2147                                    slice_batch);
2148     }
2149 }
2150
2151 static void 
2152 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2153                                      struct encode_state *encode_state,
2154                                      struct intel_encoder_context *encoder_context,
2155                                      int slice_index,
2156                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2157                                      struct intel_batchbuffer *slice_batch)
2158 {
2159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2160     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2161     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2162     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2163     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2164     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2165     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2166     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2167     int i, j;
2168     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2169     unsigned int *msg = NULL;
2170     unsigned char *msg_ptr = NULL;
2171
2172     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2173     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2174     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2175     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2176
2177     dri_bo_map(vme_context->vme_output.bo , 0);
2178     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2179
2180     if (next_slice_group_param) {
2181         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2182         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2183     } else {
2184         h_next_start_pos = 0;
2185         v_next_start_pos = height_in_mbs;
2186     }
2187
2188     gen75_mfc_mpeg2_slicegroup_state(ctx,
2189                                      encoder_context,
2190                                      h_start_pos,
2191                                      v_start_pos,
2192                                      h_next_start_pos,
2193                                      v_next_start_pos,
2194                                      slice_index == 0,
2195                                      next_slice_group_param == NULL,
2196                                      slice_param->is_intra_slice,
2197                                      slice_param->quantiser_scale_code,
2198                                      slice_batch);
2199
2200     if (slice_index == 0) 
2201         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2202
2203     /* Insert '00' to make sure the header is valid */
2204     mfc_context->insert_object(ctx,
2205                                encoder_context,
2206                                (unsigned int*)section_delimiter,
2207                                1,
2208                                8,   /* 8bits in the last DWORD */
2209                                1,   /* 1 byte */
2210                                1,
2211                                0,
2212                                0,
2213                                slice_batch);
2214
2215     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2216         /* PAK for each macroblocks */
2217         for (j = 0; j < slice_param->num_macroblocks; j++) {
2218             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2219             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2220             int first_mb_in_slice = (j == 0);
2221             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2222             int first_mb_in_slice_group = (i == 0 && j == 0);
2223             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2224                                           j == slice_param->num_macroblocks - 1);
2225
2226             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2227
2228             if (slice_param->is_intra_slice) {
2229                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2230                                                  encoder_context,
2231                                                  h_pos, v_pos,
2232                                                  first_mb_in_slice,
2233                                                  last_mb_in_slice,
2234                                                  first_mb_in_slice_group,
2235                                                  last_mb_in_slice_group,
2236                                                  0x1a,
2237                                                  slice_param->quantiser_scale_code,
2238                                                  0x3f,
2239                                                  0,
2240                                                  0xff,
2241                                                  slice_batch);
2242             } else {
2243                 int inter_rdo, intra_rdo;
2244                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2245                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2246
2247                 if (intra_rdo < inter_rdo) 
2248                         gen75_mfc_mpeg2_pak_object_intra(ctx,
2249                                                  encoder_context,
2250                                                  h_pos, v_pos,
2251                                                  first_mb_in_slice,
2252                                                  last_mb_in_slice,
2253                                                  first_mb_in_slice_group,
2254                                                  last_mb_in_slice_group,
2255                                                  0x1a,
2256                                                  slice_param->quantiser_scale_code,
2257                                                  0x3f,
2258                                                  0,
2259                                                  0xff,
2260                                                  slice_batch);
2261                 else
2262                         gen75_mfc_mpeg2_pak_object_inter(ctx,
2263                                                  encode_state,
2264                                                  encoder_context,
2265                                                  msg,
2266                                                  width_in_mbs, height_in_mbs,
2267                                                  h_pos, v_pos,
2268                                                  first_mb_in_slice,
2269                                                  last_mb_in_slice,
2270                                                  first_mb_in_slice_group,
2271                                                  last_mb_in_slice_group,
2272                                                  slice_param->quantiser_scale_code,
2273                                                  0,
2274                                                  0xff,
2275                                                  slice_batch);
2276             }
2277         }
2278
2279         slice_param++;
2280     }
2281
2282     dri_bo_unmap(vme_context->vme_output.bo);
2283
2284     /* tail data */
2285     if (next_slice_group_param == NULL) { /* end of a picture */
2286         mfc_context->insert_object(ctx,
2287                                    encoder_context,
2288                                    (unsigned int *)tail_delimiter,
2289                                    2,
2290                                    8,   /* 8bits in the last DWORD */
2291                                    5,   /* 5 bytes */
2292                                    1,
2293                                    1,
2294                                    0,
2295                                    slice_batch);
2296     } else {        /* end of a lsice group */
2297         mfc_context->insert_object(ctx,
2298                                    encoder_context,
2299                                    (unsigned int *)section_delimiter,
2300                                    1,
2301                                    8,   /* 8bits in the last DWORD */
2302                                    1,   /* 1 byte */
2303                                    1,
2304                                    1,
2305                                    0,
2306                                    slice_batch);
2307     }
2308 }
2309
2310 /* 
2311  * A batch buffer for all slices, including slice state, 
2312  * slice insert object and slice pak object commands
2313  *
2314  */
2315 static dri_bo *
2316 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2317                                            struct encode_state *encode_state,
2318                                            struct intel_encoder_context *encoder_context)
2319 {
2320     struct i965_driver_data *i965 = i965_driver_data(ctx);
2321     struct intel_batchbuffer *batch;
2322     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2323     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2324     dri_bo *batch_bo;
2325     int i;
2326     int buffer_size;
2327     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2328     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2329
2330     buffer_size = width_in_mbs * height_in_mbs * 64;
2331     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2332     batch_bo = batch->buffer;
2333
2334     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2335         if (i == encode_state->num_slice_params_ext - 1)
2336             next_slice_group_param = NULL;
2337         else
2338             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2339
2340         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2341     }
2342
2343     intel_batchbuffer_align(batch, 8);
2344     
2345     BEGIN_BCS_BATCH(batch, 2);
2346     OUT_BCS_BATCH(batch, 0);
2347     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2348     ADVANCE_BCS_BATCH(batch);
2349
2350     dri_bo_reference(batch_bo);
2351     intel_batchbuffer_free(batch);
2352
2353     return batch_bo;
2354 }
2355
2356 static void
2357 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2358                                             struct encode_state *encode_state,
2359                                             struct intel_encoder_context *encoder_context)
2360 {
2361     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2362
2363     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2364     mfc_context->set_surface_state(ctx, encoder_context);
2365     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2366     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2367     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2368     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2369     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2370     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2371 }
2372
2373 static void
2374 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2375                                     struct encode_state *encode_state,
2376                                     struct intel_encoder_context *encoder_context)
2377 {
2378     struct intel_batchbuffer *batch = encoder_context->base.batch;
2379     dri_bo *slice_batch_bo;
2380
2381     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2382
2383     // begin programing
2384     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2385     intel_batchbuffer_emit_mi_flush(batch);
2386     
2387     // picture level programing
2388     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2389
2390     BEGIN_BCS_BATCH(batch, 2);
2391     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2392     OUT_BCS_RELOC(batch,
2393                   slice_batch_bo,
2394                   I915_GEM_DOMAIN_COMMAND, 0, 
2395                   0);
2396     ADVANCE_BCS_BATCH(batch);
2397
2398     // end programing
2399     intel_batchbuffer_end_atomic(batch);
2400
2401     dri_bo_unreference(slice_batch_bo);
2402 }
2403
2404 static VAStatus
2405 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2406                         struct encode_state *encode_state,
2407                         struct intel_encoder_context *encoder_context)
2408 {
2409     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2410     struct object_surface *obj_surface; 
2411     struct object_buffer *obj_buffer;
2412     struct i965_coded_buffer_segment *coded_buffer_segment;
2413     VAStatus vaStatus = VA_STATUS_SUCCESS;
2414     dri_bo *bo;
2415     int i;
2416
2417     /* reconstructed surface */
2418     obj_surface = encode_state->reconstructed_object;
2419     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2420     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2421     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2422     mfc_context->surface_state.width = obj_surface->orig_width;
2423     mfc_context->surface_state.height = obj_surface->orig_height;
2424     mfc_context->surface_state.w_pitch = obj_surface->width;
2425     mfc_context->surface_state.h_pitch = obj_surface->height;
2426
2427     /* forward reference */
2428     obj_surface = encode_state->reference_objects[0];
2429
2430     if (obj_surface && obj_surface->bo) {
2431         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2432         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2433     } else
2434         mfc_context->reference_surfaces[0].bo = NULL;
2435
2436     /* backward reference */
2437     obj_surface = encode_state->reference_objects[1];
2438
2439     if (obj_surface && obj_surface->bo) {
2440         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2441         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2442     } else {
2443         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2444
2445         if (mfc_context->reference_surfaces[1].bo)
2446             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2447     }
2448
2449     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2450         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2451
2452         if (mfc_context->reference_surfaces[i].bo)
2453             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2454     }
2455     
2456     /* input YUV surface */
2457     obj_surface = encode_state->input_yuv_object;
2458     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2459     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2460
2461     /* coded buffer */
2462     obj_buffer = encode_state->coded_buf_object;
2463     bo = obj_buffer->buffer_store->bo;
2464     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2465     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2466     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2467     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2468
2469     /* set the internal flag to 0 to indicate the coded size is unknown */
2470     dri_bo_map(bo, 1);
2471     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2472     coded_buffer_segment->mapped = 0;
2473     coded_buffer_segment->codec = encoder_context->codec;
2474     dri_bo_unmap(bo);
2475
2476     return vaStatus;
2477 }
2478
2479 static VAStatus
2480 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2481                                struct encode_state *encode_state,
2482                                struct intel_encoder_context *encoder_context)
2483 {
2484     gen75_mfc_init(ctx, encode_state, encoder_context);
2485     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2486     /*Programing bcs pipeline*/
2487     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2488     gen75_mfc_run(ctx, encode_state, encoder_context);
2489
2490     return VA_STATUS_SUCCESS;
2491 }
2492
2493 static void
2494 gen75_mfc_context_destroy(void *context)
2495 {
2496     struct gen6_mfc_context *mfc_context = context;
2497     int i;
2498
2499     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2500     mfc_context->post_deblocking_output.bo = NULL;
2501
2502     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2503     mfc_context->pre_deblocking_output.bo = NULL;
2504
2505     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2506     mfc_context->uncompressed_picture_source.bo = NULL;
2507
2508     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2509     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2510
2511     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2512         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2513         mfc_context->direct_mv_buffers[i].bo = NULL;
2514     }
2515
2516     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2517     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2518
2519     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2520     mfc_context->macroblock_status_buffer.bo = NULL;
2521
2522     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2523     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2526     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2527
2528     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2529         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2530         mfc_context->reference_surfaces[i].bo = NULL;  
2531     }
2532
2533     i965_gpe_context_destroy(&mfc_context->gpe_context);
2534
2535     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2536     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2537
2538     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2539     mfc_context->aux_batchbuffer_surface.bo = NULL;
2540
2541     if (mfc_context->aux_batchbuffer)
2542         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2543
2544     mfc_context->aux_batchbuffer = NULL;
2545
2546     free(mfc_context);
2547 }
2548
2549 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2550                   VAProfile profile,
2551                   struct encode_state *encode_state,
2552                   struct intel_encoder_context *encoder_context)
2553 {
2554     VAStatus vaStatus;
2555
2556     switch (profile) {
2557     case VAProfileH264Baseline:
2558     case VAProfileH264Main:
2559     case VAProfileH264High:
2560         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2561         break;
2562
2563         /* FIXME: add for other profile */
2564     case VAProfileMPEG2Simple:
2565     case VAProfileMPEG2Main:
2566         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2567         break;
2568
2569     default:
2570         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2571         break;
2572     }
2573
2574     return vaStatus;
2575 }
2576
2577 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2578 {
2579     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2580
2581     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2582
2583     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2584     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2585
2586     mfc_context->gpe_context.curbe.length = 32 * 4;
2587
2588     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2589     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2590     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2591     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2592     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2593
2594     i965_gpe_load_kernels(ctx,
2595                           &mfc_context->gpe_context,
2596                           gen75_mfc_kernels,
2597                           NUM_MFC_KERNEL);
2598
2599     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2600     mfc_context->set_surface_state = gen75_mfc_surface_state;
2601     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2602     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2603     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2604     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2605     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2606     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2607
2608     encoder_context->mfc_context = mfc_context;
2609     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2610     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2611     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2612
2613     return True;
2614 }