Optimize quantization rounding precision for MPEG2 encoding on haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                 struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164         /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168         /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430
431     if (encoder_context->codec == CODEC_H264) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     /*Encode common setup for MFC*/
445     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
446     mfc_context->post_deblocking_output.bo = NULL;
447
448     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
449     mfc_context->pre_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
452     mfc_context->uncompressed_picture_source.bo = NULL;
453
454     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
455     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
456
457     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
458         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
459         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
460         mfc_context->direct_mv_buffers[i].bo = NULL;
461     }
462
463     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
464         if (mfc_context->reference_surfaces[i].bo != NULL)
465             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
466         mfc_context->reference_surfaces[i].bo = NULL;  
467     }
468
469     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       width_in_mbs * 64,
473                       64);
474     assert(bo);
475     mfc_context->intra_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
478     bo = dri_bo_alloc(i965->intel.bufmgr,
479                       "Buffer",
480                       width_in_mbs * height_in_mbs * 16,
481                       64);
482     assert(bo);
483     mfc_context->macroblock_status_buffer.bo = bo;
484
485     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
486     bo = dri_bo_alloc(i965->intel.bufmgr,
487                       "Buffer",
488                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
489                       64);
490     assert(bo);
491     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
492
493     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
494     bo = dri_bo_alloc(i965->intel.bufmgr,
495                       "Buffer",
496                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
497                       0x1000);
498     assert(bo);
499     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
500
501     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
502     mfc_context->mfc_batchbuffer_surface.bo = NULL;
503
504     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
505     mfc_context->aux_batchbuffer_surface.bo = NULL;
506
507     if (mfc_context->aux_batchbuffer)
508         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
509
510     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
511     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
512     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
513     mfc_context->aux_batchbuffer_surface.pitch = 16;
514     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
515     mfc_context->aux_batchbuffer_surface.size_block = 16;
516
517     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
518 }
519
520 static void
521 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
522                                 struct intel_encoder_context *encoder_context)
523 {
524     struct intel_batchbuffer *batch = encoder_context->base.batch;
525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526     int i;
527
528     BEGIN_BCS_BATCH(batch, 61);
529
530     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
531
532     /* the DW1-3 is for pre_deblocking */
533     if (mfc_context->pre_deblocking_output.bo)
534         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
535                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                       0);
537     else
538         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
539
540         OUT_BCS_BATCH(batch, 0);
541         OUT_BCS_BATCH(batch, 0);
542      /* the DW4-6 is for the post_deblocking */
543
544     if (mfc_context->post_deblocking_output.bo)
545         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
546                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                       0);                                                                                       /* post output addr  */ 
548     else
549         OUT_BCS_BATCH(batch, 0);
550         OUT_BCS_BATCH(batch, 0);
551         OUT_BCS_BATCH(batch, 0);
552
553      /* the DW7-9 is for the uncompressed_picture */
554     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* uncompressed data */
557
558         OUT_BCS_BATCH(batch, 0);
559         OUT_BCS_BATCH(batch, 0);
560
561      /* the DW10-12 is for the mb status */
562     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0); /* StreamOut data*/
565         OUT_BCS_BATCH(batch, 0);
566         OUT_BCS_BATCH(batch, 0);
567
568      /* the DW13-15 is for the intra_row_store_scratch */
569     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
570                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571                   0);   
572         OUT_BCS_BATCH(batch, 0);
573         OUT_BCS_BATCH(batch, 0);
574
575      /* the DW16-18 is for the deblocking filter */
576     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);
579         OUT_BCS_BATCH(batch, 0);
580         OUT_BCS_BATCH(batch, 0);
581
582     /* the DW 19-50 is for Reference pictures*/
583     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
584         if ( mfc_context->reference_surfaces[i].bo != NULL) {
585             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
586                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
587                           0);                   
588         } else {
589             OUT_BCS_BATCH(batch, 0);
590         }
591         OUT_BCS_BATCH(batch, 0);
592     }
593         OUT_BCS_BATCH(batch, 0);
594
595         /* The DW 52-54 is for the MB status buffer */
596     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
597                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598                   0);                                                                                   /* Macroblock status buffer*/
599         
600         OUT_BCS_BATCH(batch, 0);
601         OUT_BCS_BATCH(batch, 0);
602
603         /* the DW 55-57 is the ILDB buffer */
604         OUT_BCS_BATCH(batch, 0);
605         OUT_BCS_BATCH(batch, 0);
606         OUT_BCS_BATCH(batch, 0);
607
608         /* the DW 58-60 is the second ILDB buffer */
609         OUT_BCS_BATCH(batch, 0);
610         OUT_BCS_BATCH(batch, 0);
611         OUT_BCS_BATCH(batch, 0);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void
616 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
617 {
618     struct intel_batchbuffer *batch = encoder_context->base.batch;
619     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
620     struct i965_driver_data *i965 = i965_driver_data(ctx);
621     int i;
622
623     if (IS_STEPPING_BPLUS(i965)) {
624         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
625         return;
626     }
627
628     BEGIN_BCS_BATCH(batch, 25);
629
630     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
631
632     if (mfc_context->pre_deblocking_output.bo)
633         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
634                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                       0);
636     else
637         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
638
639     if (mfc_context->post_deblocking_output.bo)
640         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
641                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                       0);                                                                                       /* post output addr  */ 
643     else
644         OUT_BCS_BATCH(batch, 0);
645
646     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
647                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                   0);                                                                                   /* uncompressed data */
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* StreamOut data*/
652     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);   
655     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);
658     /* 7..22 Reference pictures*/
659     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
660         if ( mfc_context->reference_surfaces[i].bo != NULL) {
661             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
662                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                           0);                   
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666         }
667     }
668     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
669                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670                   0);                                                                                   /* Macroblock status buffer*/
671
672         OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
679                                 struct intel_encoder_context *encoder_context)
680 {
681     struct intel_batchbuffer *batch = encoder_context->base.batch;
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     int i;
685
686     BEGIN_BCS_BATCH(batch, 71);
687
688     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
689
690     /* Reference frames and Current frames */
691     /* the DW1-32 is for the direct MV for reference */
692     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
693         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
694             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697             OUT_BCS_BATCH(batch, 0);
698         } else {
699             OUT_BCS_BATCH(batch, 0);
700             OUT_BCS_BATCH(batch, 0);
701         }
702     }
703         OUT_BCS_BATCH(batch, 0);
704
705         /* the DW34-36 is the MV for the current reference */
706         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
707                           I915_GEM_DOMAIN_INSTRUCTION, 0,
708                           0);
709
710         OUT_BCS_BATCH(batch, 0);
711         OUT_BCS_BATCH(batch, 0);
712
713     /* POL list */
714     for(i = 0; i < 32; i++) {
715         OUT_BCS_BATCH(batch, i/2);
716     }
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719
720     ADVANCE_BCS_BATCH(batch);
721 }
722
723 static void
724 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
725 {
726     struct intel_batchbuffer *batch = encoder_context->base.batch;
727     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     int i;
730
731     if (IS_STEPPING_BPLUS(i965)) {
732         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
733         return;
734     }
735
736     BEGIN_BCS_BATCH(batch, 69);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
739
740     /* Reference frames and Current frames */
741     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
742         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
743             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
744                           I915_GEM_DOMAIN_INSTRUCTION, 0,
745                           0);
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     /* POL list */
752     for(i = 0; i < 32; i++) {
753         OUT_BCS_BATCH(batch, i/2);
754     }
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757
758     ADVANCE_BCS_BATCH(batch);
759 }
760
761
762 static void
763 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
764                                 struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
768
769     BEGIN_BCS_BATCH(batch, 10);
770
771     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
772     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
773                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
774                   0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777         
778         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782
783         /* the DW7-9 is for Bitplane Read Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static void
792 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
793 {
794     struct intel_batchbuffer *batch = encoder_context->base.batch;
795     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797
798     if (IS_STEPPING_BPLUS(i965)) {
799         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
800         return;
801     }
802
803     BEGIN_BCS_BATCH(batch, 4);
804
805     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
806     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808                   0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815
816 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
817                                       struct encode_state *encode_state,
818                                       struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
821
822     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
823     mfc_context->set_surface_state(ctx, encoder_context);
824     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
825     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
826     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
827     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
828     mfc_context->avc_qm_state(ctx, encoder_context);
829     mfc_context->avc_fqm_state(ctx, encoder_context);
830     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
831     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
832 }
833
834
835 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
836                              struct encode_state *encode_state,
837                              struct intel_encoder_context *encoder_context)
838 {
839     struct intel_batchbuffer *batch = encoder_context->base.batch;
840
841     intel_batchbuffer_flush(batch);             //run the pipeline
842
843     return VA_STATUS_SUCCESS;
844 }
845
846
847 static VAStatus
848 gen75_mfc_stop(VADriverContextP ctx, 
849               struct encode_state *encode_state,
850               struct intel_encoder_context *encoder_context,
851               int *encoded_bits_size)
852 {
853     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
854     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855     VACodedBufferSegment *coded_buffer_segment;
856     
857     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
858     assert(vaStatus == VA_STATUS_SUCCESS);
859     *encoded_bits_size = coded_buffer_segment->size * 8;
860     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
861
862     return VA_STATUS_SUCCESS;
863 }
864
865
866 static void
867 gen75_mfc_avc_slice_state(VADriverContextP ctx,
868                          VAEncPictureParameterBufferH264 *pic_param,
869                          VAEncSliceParameterBufferH264 *slice_param,
870                          struct encode_state *encode_state,
871                          struct intel_encoder_context *encoder_context,
872                          int rate_control_enable,
873                          int qp,
874                          struct intel_batchbuffer *batch)
875 {
876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
877     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
878     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
879     int beginmb = slice_param->macroblock_address;
880     int endmb = beginmb + slice_param->num_macroblocks;
881     int beginx = beginmb % width_in_mbs;
882     int beginy = beginmb / width_in_mbs;
883     int nextx =  endmb % width_in_mbs;
884     int nexty = endmb / width_in_mbs;
885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
886     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
887     int maxQpN, maxQpP;
888     unsigned char correct[6], grow, shrink;
889     int i;
890     int bslice = 0;
891     int weighted_pred_idc = 0;
892     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
893     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
894
895     if (batch == NULL)
896         batch = encoder_context->base.batch;
897
898     if (slice_type == SLICE_TYPE_P) {
899         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
900     } else if (slice_type == SLICE_TYPE_B) {
901         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
902         bslice = 1;
903
904         if (weighted_pred_idc == 2) {
905             /* 8.4.3 - Derivation process for prediction weights (8-279) */
906             luma_log2_weight_denom = 5;
907             chroma_log2_weight_denom = 5;
908         }
909     }
910
911     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
912     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
913
914     for (i = 0; i < 6; i++)
915         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
916
917     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
918         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
919     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
920         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
921
922     BEGIN_BCS_BATCH(batch, 11);;
923
924     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
925     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
926
927     if (slice_type == SLICE_TYPE_I) {
928         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
929     } else {
930         OUT_BCS_BATCH(batch,
931                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
932                       (chroma_log2_weight_denom << 8) |
933                       (luma_log2_weight_denom << 0));
934     }
935
936     OUT_BCS_BATCH(batch, 
937                   (weighted_pred_idc << 30) |
938                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
939                   (slice_param->disable_deblocking_filter_idc << 27) |
940                   (slice_param->cabac_init_idc << 24) |
941                   (qp<<16) |                    /*Slice Quantization Parameter*/
942                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
943                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
944     OUT_BCS_BATCH(batch,
945                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
946                   (beginx << 16) |
947                   slice_param->macroblock_address );
948     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
949     OUT_BCS_BATCH(batch, 
950                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
951                   (1 << 30) |           /*ResetRateControlCounter*/
952                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
953                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
954                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
955                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
956                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
957                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
958                   (last_slice << 19) |     /*IsLastSlice*/
959                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
960                   (1 << 17) |       /*HeaderPresentFlag*/       
961                   (1 << 16) |       /*SliceData PresentFlag*/
962                   (1 << 15) |       /*TailPresentFlag*/
963                   (1 << 13) |       /*RBSP NAL TYPE*/   
964                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
965     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
966     OUT_BCS_BATCH(batch,
967                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
968                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
969                   (shrink << 8)  |
970                   (grow << 0));   
971     OUT_BCS_BATCH(batch,
972                   (correct[5] << 20) |
973                   (correct[4] << 16) |
974                   (correct[3] << 12) |
975                   (correct[2] << 8) |
976                   (correct[1] << 4) |
977                   (correct[0] << 0));
978     OUT_BCS_BATCH(batch, 0);
979
980     ADVANCE_BCS_BATCH(batch);
981 }
982
983
984 #ifdef MFC_SOFTWARE_HASWELL
985
986 static int
987 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
988                                 int qp,unsigned int *msg,
989                               struct intel_encoder_context *encoder_context,
990                               unsigned char target_mb_size, unsigned char max_mb_size,
991                               struct intel_batchbuffer *batch)
992 {
993     int len_in_dwords = 12;
994     unsigned int intra_msg;
995 #define         INTRA_MSG_FLAG          (1 << 13)
996 #define         INTRA_MBTYPE_MASK       (0x1F0000)
997     if (batch == NULL)
998         batch = encoder_context->base.batch;
999
1000     BEGIN_BCS_BATCH(batch, len_in_dwords);
1001
1002     intra_msg = msg[0] & 0xC0FF;
1003     intra_msg |= INTRA_MSG_FLAG;
1004     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1005     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1006     OUT_BCS_BATCH(batch, 0);
1007     OUT_BCS_BATCH(batch, 0);
1008     OUT_BCS_BATCH(batch, 
1009                   (0 << 24) |           /* PackedMvNum, Debug*/
1010                   (0 << 20) |           /* No motion vector */
1011                   (1 << 19) |           /* CbpDcY */
1012                   (1 << 18) |           /* CbpDcU */
1013                   (1 << 17) |           /* CbpDcV */
1014                   intra_msg);
1015
1016     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1017     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1018     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1019
1020     /*Stuff for Intra MB*/
1021     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1022     OUT_BCS_BATCH(batch, msg[2]);       
1023     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1024     
1025     /*MaxSizeInWord and TargetSzieInWord*/
1026     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1027                   (target_mb_size << 16) );
1028
1029     OUT_BCS_BATCH(batch, 0);
1030
1031     ADVANCE_BCS_BATCH(batch);
1032
1033     return len_in_dwords;
1034 }
1035
1036 static int
1037 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1038                               unsigned int *msg, unsigned int offset,
1039                               struct intel_encoder_context *encoder_context,
1040                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1041                               struct intel_batchbuffer *batch)
1042 {
1043     int len_in_dwords = 12;
1044         unsigned int inter_msg = 0;
1045     if (batch == NULL)
1046         batch = encoder_context->base.batch;
1047     {
1048 #define MSG_MV_OFFSET   4
1049         unsigned int *mv_ptr;
1050         mv_ptr = msg + MSG_MV_OFFSET;
1051         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1052          * to convert them to be compatible with the format of AVC_PAK
1053          * command.
1054          */
1055         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1056                 /* MV[0] and MV[2] are replicated */
1057                 mv_ptr[4] = mv_ptr[0];
1058                 mv_ptr[5] = mv_ptr[1];
1059                 mv_ptr[2] = mv_ptr[8];
1060                 mv_ptr[3] = mv_ptr[9];
1061                 mv_ptr[6] = mv_ptr[8]; 
1062                 mv_ptr[7] = mv_ptr[9]; 
1063         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1064                 /* MV[0] and MV[1] are replicated */
1065                 mv_ptr[2] = mv_ptr[0];  
1066                 mv_ptr[3] = mv_ptr[1];
1067                 mv_ptr[4] = mv_ptr[16]; 
1068                 mv_ptr[5] = mv_ptr[17]; 
1069                 mv_ptr[6] = mv_ptr[24];
1070                 mv_ptr[7] = mv_ptr[25];
1071         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1072                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1073                 /* Don't touch MV[0] or MV[1] */
1074                 mv_ptr[2] = mv_ptr[8];
1075                 mv_ptr[3] = mv_ptr[9];
1076                 mv_ptr[4] = mv_ptr[16];
1077                 mv_ptr[5] = mv_ptr[17];
1078                 mv_ptr[6] = mv_ptr[24];
1079                 mv_ptr[7] = mv_ptr[25];
1080         }
1081     }
1082
1083     BEGIN_BCS_BATCH(batch, len_in_dwords);
1084
1085     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1086
1087         inter_msg = 32;
1088         /* MV quantity */
1089         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1090                 if (msg[1] & SUBMB_SHAPE_MASK)
1091                         inter_msg = 128;
1092         }
1093     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1094     OUT_BCS_BATCH(batch, offset);
1095         inter_msg = msg[0] & (0x1F00FFFF);
1096         inter_msg |= INTER_MV8;
1097         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1098         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1099                         (msg[1] & SUBMB_SHAPE_MASK)) {
1100                 inter_msg |= INTER_MV32;
1101         }
1102
1103     OUT_BCS_BATCH(batch, inter_msg);
1104
1105     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1106     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1107 #if 0 
1108     if ( slice_type == SLICE_TYPE_B) {
1109         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1110     } else {
1111         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1112     }
1113 #else
1114     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1115 #endif
1116
1117         inter_msg = msg[1] >> 8;
1118     /*Stuff for Inter MB*/
1119     OUT_BCS_BATCH(batch, inter_msg);        
1120     OUT_BCS_BATCH(batch, 0x0);    
1121     OUT_BCS_BATCH(batch, 0x0);        
1122
1123     /*MaxSizeInWord and TargetSzieInWord*/
1124     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1125                   (target_mb_size << 16) );
1126
1127     OUT_BCS_BATCH(batch, 0x0);    
1128
1129     ADVANCE_BCS_BATCH(batch);
1130
1131     return len_in_dwords;
1132 }
1133
1134 #define         AVC_INTRA_RDO_OFFSET    4
1135 #define         AVC_INTER_RDO_OFFSET    10
1136 #define         AVC_INTER_MSG_OFFSET    8       
1137 #define         AVC_INTER_MV_OFFSET             48
1138 #define         AVC_RDO_MASK            0xFFFF
1139
1140 static void 
1141 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1142                                        struct encode_state *encode_state,
1143                                        struct intel_encoder_context *encoder_context,
1144                                        int slice_index,
1145                                        struct intel_batchbuffer *slice_batch)
1146 {
1147     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1148     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1149     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1150     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1151     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1152     unsigned int *msg = NULL, offset = 0;
1153     unsigned char *msg_ptr = NULL;
1154     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1155     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1156     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1157     int i,x,y;
1158     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1159     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1160     unsigned char *slice_header = NULL;
1161     int slice_header_length_in_bits = 0;
1162     unsigned int tail_data[] = { 0x0, 0x0 };
1163     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1164     int is_intra = slice_type == SLICE_TYPE_I;
1165
1166     if (rate_control_mode == VA_RC_CBR) {
1167         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1168         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1169     }
1170
1171     /* only support for 8-bit pixel bit-depth */
1172     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1173     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1174     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1175     assert(qp >= 0 && qp < 52);
1176
1177     gen75_mfc_avc_slice_state(ctx, 
1178                              pPicParameter,
1179                              pSliceParameter,
1180                              encode_state, encoder_context,
1181                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1182
1183     if ( slice_index == 0) 
1184         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1185
1186     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1187
1188     // slice hander
1189     mfc_context->insert_object(ctx, encoder_context,
1190                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1191                                5,  /* first 5 bytes are start code + nal unit type */
1192                                1, 0, 1, slice_batch);
1193
1194     dri_bo_map(vme_context->vme_output.bo , 1);
1195     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1196
1197     if (is_intra) {
1198         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1199     } else {
1200         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1201     }
1202    
1203     for (i = pSliceParameter->macroblock_address; 
1204          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1205         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1206         x = i % width_in_mbs;
1207         y = i / width_in_mbs;
1208         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1209
1210         if (is_intra) {
1211             assert(msg);
1212             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1213         } else {
1214             int inter_rdo, intra_rdo;
1215             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1216             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1217             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1218             if (intra_rdo < inter_rdo) { 
1219                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1220             } else {
1221                 msg += AVC_INTER_MSG_OFFSET;
1222                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1223             }
1224         }
1225     }
1226    
1227     dri_bo_unmap(vme_context->vme_output.bo);
1228
1229     if ( last_slice ) {    
1230         mfc_context->insert_object(ctx, encoder_context,
1231                                    tail_data, 2, 8,
1232                                    2, 1, 1, 0, slice_batch);
1233     } else {
1234         mfc_context->insert_object(ctx, encoder_context,
1235                                    tail_data, 1, 8,
1236                                    1, 1, 1, 0, slice_batch);
1237     }
1238
1239     free(slice_header);
1240
1241 }
1242
1243 static dri_bo *
1244 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1245                                   struct encode_state *encode_state,
1246                                   struct intel_encoder_context *encoder_context)
1247 {
1248     struct i965_driver_data *i965 = i965_driver_data(ctx);
1249     struct intel_batchbuffer *batch;
1250     dri_bo *batch_bo;
1251     int i;
1252     int buffer_size;
1253     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1254     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1255     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1256
1257     buffer_size = width_in_mbs * height_in_mbs * 64;
1258     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1259     batch_bo = batch->buffer;
1260     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1261         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1262     }
1263
1264     intel_batchbuffer_align(batch, 8);
1265     
1266     BEGIN_BCS_BATCH(batch, 2);
1267     OUT_BCS_BATCH(batch, 0);
1268     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1269     ADVANCE_BCS_BATCH(batch);
1270
1271     dri_bo_reference(batch_bo);
1272     intel_batchbuffer_free(batch);
1273
1274     return batch_bo;
1275 }
1276
1277 #else
1278
1279 static void
1280 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1281                                     struct encode_state *encode_state,
1282                                     struct intel_encoder_context *encoder_context)
1283
1284 {
1285     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1286     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1287
1288     assert(vme_context->vme_output.bo);
1289     mfc_context->buffer_suface_setup(ctx,
1290                                      &mfc_context->gpe_context,
1291                                      &vme_context->vme_output,
1292                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1293                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1294     assert(mfc_context->aux_batchbuffer_surface.bo);
1295     mfc_context->buffer_suface_setup(ctx,
1296                                      &mfc_context->gpe_context,
1297                                      &mfc_context->aux_batchbuffer_surface,
1298                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1299                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1300 }
1301
1302 static void
1303 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1304                                      struct encode_state *encode_state,
1305                                      struct intel_encoder_context *encoder_context)
1306
1307 {
1308     struct i965_driver_data *i965 = i965_driver_data(ctx);
1309     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1310     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1311     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1312     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1313     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1314     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1315     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1316     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1317                                                            "MFC batchbuffer",
1318                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1319                                                            0x1000);
1320     mfc_context->buffer_suface_setup(ctx,
1321                                      &mfc_context->gpe_context,
1322                                      &mfc_context->mfc_batchbuffer_surface,
1323                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1324                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1325 }
1326
1327 static void
1328 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1329                                     struct encode_state *encode_state,
1330                                     struct intel_encoder_context *encoder_context)
1331 {
1332     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1333     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1334 }
1335
1336 static void
1337 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1338                                 struct encode_state *encode_state,
1339                                 struct intel_encoder_context *encoder_context)
1340 {
1341     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1342     struct gen6_interface_descriptor_data *desc;   
1343     int i;
1344     dri_bo *bo;
1345
1346     bo = mfc_context->gpe_context.idrt.bo;
1347     dri_bo_map(bo, 1);
1348     assert(bo->virtual);
1349     desc = bo->virtual;
1350
1351     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1352         struct i965_kernel *kernel;
1353
1354         kernel = &mfc_context->gpe_context.kernels[i];
1355         assert(sizeof(*desc) == 32);
1356
1357         /*Setup the descritor table*/
1358         memset(desc, 0, sizeof(*desc));
1359         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1360         desc->desc2.sampler_count = 0;
1361         desc->desc2.sampler_state_pointer = 0;
1362         desc->desc3.binding_table_entry_count = 2;
1363         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1364         desc->desc4.constant_urb_entry_read_offset = 0;
1365         desc->desc4.constant_urb_entry_read_length = 4;
1366                 
1367         /*kernel start*/
1368         dri_bo_emit_reloc(bo,   
1369                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1370                           0,
1371                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1372                           kernel->bo);
1373         desc++;
1374     }
1375
1376     dri_bo_unmap(bo);
1377 }
1378
1379 static void
1380 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1381                                     struct encode_state *encode_state,
1382                                     struct intel_encoder_context *encoder_context)
1383 {
1384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1385     
1386     (void)mfc_context;
1387 }
1388
1389 static void
1390 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1391                                          int index,
1392                                          int head_offset,
1393                                          int batchbuffer_offset,
1394                                          int head_size,
1395                                          int tail_size,
1396                                          int number_mb_cmds,
1397                                          int first_object,
1398                                          int last_object,
1399                                          int last_slice,
1400                                          int mb_x,
1401                                          int mb_y,
1402                                          int width_in_mbs,
1403                                          int qp)
1404 {
1405     BEGIN_BATCH(batch, 12);
1406     
1407     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1408     OUT_BATCH(batch, index);
1409     OUT_BATCH(batch, 0);
1410     OUT_BATCH(batch, 0);
1411     OUT_BATCH(batch, 0);
1412     OUT_BATCH(batch, 0);
1413    
1414     /*inline data */
1415     OUT_BATCH(batch, head_offset);
1416     OUT_BATCH(batch, batchbuffer_offset);
1417     OUT_BATCH(batch, 
1418               head_size << 16 |
1419               tail_size);
1420     OUT_BATCH(batch,
1421               number_mb_cmds << 16 |
1422               first_object << 2 |
1423               last_object << 1 |
1424               last_slice);
1425     OUT_BATCH(batch,
1426               mb_y << 8 |
1427               mb_x);
1428     OUT_BATCH(batch,
1429               qp << 16 |
1430               width_in_mbs);
1431
1432     ADVANCE_BATCH(batch);
1433 }
1434
1435 static void
1436 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1437                                        struct intel_encoder_context *encoder_context,
1438                                        VAEncSliceParameterBufferH264 *slice_param,
1439                                        int head_offset,
1440                                        unsigned short head_size,
1441                                        unsigned short tail_size,
1442                                        int batchbuffer_offset,
1443                                        int qp,
1444                                        int last_slice)
1445 {
1446     struct intel_batchbuffer *batch = encoder_context->base.batch;
1447     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1448     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1449     int total_mbs = slice_param->num_macroblocks;
1450     int number_mb_cmds = 128;
1451     int starting_mb = 0;
1452     int last_object = 0;
1453     int first_object = 1;
1454     int i;
1455     int mb_x, mb_y;
1456     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1457
1458     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1459         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1460         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1461         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1462         assert(mb_x <= 255 && mb_y <= 255);
1463
1464         starting_mb += number_mb_cmds;
1465
1466         gen75_mfc_batchbuffer_emit_object_command(batch,
1467                                                  index,
1468                                                  head_offset,
1469                                                  batchbuffer_offset,
1470                                                  head_size,
1471                                                  tail_size,
1472                                                  number_mb_cmds,
1473                                                  first_object,
1474                                                  last_object,
1475                                                  last_slice,
1476                                                  mb_x,
1477                                                  mb_y,
1478                                                  width_in_mbs,
1479                                                  qp);
1480
1481         if (first_object) {
1482             head_offset += head_size;
1483             batchbuffer_offset += head_size;
1484         }
1485
1486         if (last_object) {
1487             head_offset += tail_size;
1488             batchbuffer_offset += tail_size;
1489         }
1490
1491         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1492
1493         first_object = 0;
1494     }
1495
1496     if (!last_object) {
1497         last_object = 1;
1498         number_mb_cmds = total_mbs % number_mb_cmds;
1499         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1500         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1501         assert(mb_x <= 255 && mb_y <= 255);
1502         starting_mb += number_mb_cmds;
1503
1504         gen75_mfc_batchbuffer_emit_object_command(batch,
1505                                                  index,
1506                                                  head_offset,
1507                                                  batchbuffer_offset,
1508                                                  head_size,
1509                                                  tail_size,
1510                                                  number_mb_cmds,
1511                                                  first_object,
1512                                                  last_object,
1513                                                  last_slice,
1514                                                  mb_x,
1515                                                  mb_y,
1516                                                  width_in_mbs,
1517                                                  qp);
1518     }
1519 }
1520                           
1521 /*
1522  * return size in Owords (16bytes)
1523  */         
1524 static int
1525 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1526                                struct encode_state *encode_state,
1527                                struct intel_encoder_context *encoder_context,
1528                                int slice_index,
1529                                int batchbuffer_offset)
1530 {
1531     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1532     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1533     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1534     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1535     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1536     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1537     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1538     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1539     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1540     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1541     unsigned char *slice_header = NULL;
1542     int slice_header_length_in_bits = 0;
1543     unsigned int tail_data[] = { 0x0, 0x0 };
1544     long head_offset;
1545     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1546     unsigned short head_size, tail_size;
1547     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1548
1549     if (rate_control_mode == VA_RC_CBR) {
1550         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1551         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1552     }
1553
1554     /* only support for 8-bit pixel bit-depth */
1555     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1556     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1557     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1558     assert(qp >= 0 && qp < 52);
1559
1560     head_offset = old_used / 16;
1561     gen75_mfc_avc_slice_state(ctx,
1562                              pPicParameter,
1563                              pSliceParameter,
1564                              encode_state,
1565                              encoder_context,
1566                              (rate_control_mode == VA_RC_CBR),
1567                              qp,
1568                              slice_batch);
1569
1570     if (slice_index == 0)
1571         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1572
1573     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1574
1575     // slice hander
1576     mfc_context->insert_object(ctx,
1577                                encoder_context,
1578                                (unsigned int *)slice_header,
1579                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1580                                slice_header_length_in_bits & 0x1f,
1581                                5,  /* first 5 bytes are start code + nal unit type */
1582                                1,
1583                                0,
1584                                1,
1585                                slice_batch);
1586     free(slice_header);
1587
1588     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1589     used = intel_batchbuffer_used_size(slice_batch);
1590     head_size = (used - old_used) / 16;
1591     old_used = used;
1592
1593     /* tail */
1594     if (last_slice) {    
1595         mfc_context->insert_object(ctx,
1596                                    encoder_context,
1597                                    tail_data,
1598                                    2,
1599                                    8,
1600                                    2,
1601                                    1,
1602                                    1,
1603                                    0,
1604                                    slice_batch);
1605     } else {
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    1,
1610                                    8,
1611                                    1,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     }
1617
1618     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1619     used = intel_batchbuffer_used_size(slice_batch);
1620     tail_size = (used - old_used) / 16;
1621
1622    
1623     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1624                                            encoder_context,
1625                                            pSliceParameter,
1626                                            head_offset,
1627                                            head_size,
1628                                            tail_size,
1629                                            batchbuffer_offset,
1630                                            qp,
1631                                            last_slice);
1632
1633     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1634 }
1635
1636 static void
1637 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1638                                   struct encode_state *encode_state,
1639                                   struct intel_encoder_context *encoder_context)
1640 {
1641     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1642     struct intel_batchbuffer *batch = encoder_context->base.batch;
1643     int i, size, offset = 0;
1644     intel_batchbuffer_start_atomic(batch, 0x4000); 
1645     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1646
1647     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1648         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1649         offset += size;
1650     }
1651
1652     intel_batchbuffer_end_atomic(batch);
1653     intel_batchbuffer_flush(batch);
1654 }
1655
1656 static void
1657 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1658                                struct encode_state *encode_state,
1659                                struct intel_encoder_context *encoder_context)
1660 {
1661     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1662     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1663     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1664     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1665 }
1666
1667 static dri_bo *
1668 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1669                                   struct encode_state *encode_state,
1670                                   struct intel_encoder_context *encoder_context)
1671 {
1672     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1673
1674     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1675     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1676
1677     return mfc_context->mfc_batchbuffer_surface.bo;
1678 }
1679
1680 #endif
1681
1682 static void
1683 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1684                                  struct encode_state *encode_state,
1685                                  struct intel_encoder_context *encoder_context)
1686 {
1687     struct intel_batchbuffer *batch = encoder_context->base.batch;
1688     dri_bo *slice_batch_bo;
1689
1690     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1691         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1692         assert(0);
1693         return; 
1694     }
1695
1696 #ifdef MFC_SOFTWARE_HASWELL
1697     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1698 #else
1699     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1700 #endif
1701
1702     // begin programing
1703     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1704     intel_batchbuffer_emit_mi_flush(batch);
1705     
1706     // picture level programing
1707     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1708
1709     BEGIN_BCS_BATCH(batch, 2);
1710     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1711     OUT_BCS_RELOC(batch,
1712                   slice_batch_bo,
1713                   I915_GEM_DOMAIN_COMMAND, 0, 
1714                   0);
1715     ADVANCE_BCS_BATCH(batch);
1716
1717     // end programing
1718     intel_batchbuffer_end_atomic(batch);
1719
1720     dri_bo_unreference(slice_batch_bo);
1721 }
1722
1723
1724 static VAStatus
1725 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1726                             struct encode_state *encode_state,
1727                             struct intel_encoder_context *encoder_context)
1728 {
1729     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1730     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1731     int current_frame_bits_size;
1732     int sts;
1733  
1734     for (;;) {
1735         gen75_mfc_init(ctx, encode_state, encoder_context);
1736         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1737         /*Programing bcs pipeline*/
1738         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1739         gen75_mfc_run(ctx, encode_state, encoder_context);
1740         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1741             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1742             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1743             if (sts == BRC_NO_HRD_VIOLATION) {
1744                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1745                 break;
1746             }
1747             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1748                 if (!mfc_context->hrd.violation_noted) {
1749                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1750                     mfc_context->hrd.violation_noted = 1;
1751                 }
1752                 return VA_STATUS_SUCCESS;
1753             }
1754         } else {
1755             break;
1756         }
1757     }
1758
1759     return VA_STATUS_SUCCESS;
1760 }
1761
1762 /*
1763  * MPEG-2
1764  */
1765
1766 static const int
1767 va_to_gen75_mpeg2_picture_type[3] = {
1768     1,  /* I */
1769     2,  /* P */
1770     3   /* B */
1771 };
1772
1773 static void
1774 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1775                           struct intel_encoder_context *encoder_context,
1776                           struct encode_state *encode_state)
1777 {
1778     struct intel_batchbuffer *batch = encoder_context->base.batch;
1779     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1780     VAEncPictureParameterBufferMPEG2 *pic_param;
1781     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1782     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1783     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1784
1785     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1786     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1787     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1788
1789     BEGIN_BCS_BATCH(batch, 13);
1790     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1791     OUT_BCS_BATCH(batch,
1792                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1793                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1794                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1795                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1796                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1797                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1798                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1799                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1800                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1801                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1802                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1803                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1804     OUT_BCS_BATCH(batch,
1805                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1806                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1807                   0);
1808     OUT_BCS_BATCH(batch,
1809                   1 << 31 |     /* slice concealment */
1810                   (height_in_mbs - 1) << 16 |
1811                   (width_in_mbs - 1));
1812     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1813         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1814     else
1815         OUT_BCS_BATCH(batch, 0);
1816
1817     OUT_BCS_BATCH(batch, 0);
1818     OUT_BCS_BATCH(batch,
1819                   0xFFF << 16 | /* InterMBMaxSize */
1820                   0xFFF << 0 |  /* IntraMBMaxSize */
1821                   0);
1822     OUT_BCS_BATCH(batch, 0);
1823     OUT_BCS_BATCH(batch, 0);
1824     OUT_BCS_BATCH(batch, 0);
1825     OUT_BCS_BATCH(batch, 0);
1826     OUT_BCS_BATCH(batch, 0);
1827     OUT_BCS_BATCH(batch, 0);
1828     ADVANCE_BCS_BATCH(batch);
1829 }
1830
1831 static void
1832 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1833 {
1834     unsigned char intra_qm[64] = {
1835          8, 16, 19, 22, 26, 27, 29, 34,
1836         16, 16, 22, 24, 27, 29, 34, 37,
1837         19, 22, 26, 27, 29, 34, 34, 38,
1838         22, 22, 26, 27, 29, 34, 37, 40,
1839         22, 26, 27, 29, 32, 35, 40, 48,
1840         26, 27, 29, 32, 35, 40, 48, 58,
1841         26, 27, 29, 34, 38, 46, 56, 69,
1842         27, 29, 35, 38, 46, 56, 69, 83
1843     };
1844
1845     unsigned char non_intra_qm[64] = {
1846         16, 16, 16, 16, 16, 16, 16, 16,
1847         16, 16, 16, 16, 16, 16, 16, 16,
1848         16, 16, 16, 16, 16, 16, 16, 16,
1849         16, 16, 16, 16, 16, 16, 16, 16,
1850         16, 16, 16, 16, 16, 16, 16, 16,
1851         16, 16, 16, 16, 16, 16, 16, 16,
1852         16, 16, 16, 16, 16, 16, 16, 16,
1853         16, 16, 16, 16, 16, 16, 16, 16
1854     };
1855
1856     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1857     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1858 }
1859
1860 static void
1861 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1862 {
1863     unsigned short intra_fqm[64] = {
1864          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1865          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1866          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1867          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1868          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1869          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1870          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1871          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1872     };
1873
1874     unsigned short non_intra_fqm[64] = {
1875         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1876         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1878         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1879         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1880         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1881         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1882         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883     };
1884
1885     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1886     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1887 }
1888
1889 static void
1890 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1891                                  struct intel_encoder_context *encoder_context,
1892                                  int x, int y,
1893                                  int next_x, int next_y,
1894                                  int is_fisrt_slice_group,
1895                                  int is_last_slice_group,
1896                                  int intra_slice,
1897                                  int qp,
1898                                  struct intel_batchbuffer *batch)
1899 {
1900     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1901
1902     if (batch == NULL)
1903         batch = encoder_context->base.batch;
1904
1905     BEGIN_BCS_BATCH(batch, 8);
1906
1907     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1908     OUT_BCS_BATCH(batch,
1909                   0 << 31 |                             /* MbRateCtrlFlag */
1910                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1911                   1 << 17 |                             /* Insert Header before the first slice group data */
1912                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1913                   1 << 15 |                             /* TailPresentFlag: always 1 */
1914                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1915                   !!intra_slice << 13 |                 /* IntraSlice */
1916                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1917                   0);
1918     OUT_BCS_BATCH(batch,
1919                   next_y << 24 |
1920                   next_x << 16 |
1921                   y << 8 |
1922                   x << 0 |
1923                   0);
1924     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1925     /* bitstream pointer is only loaded once for the first slice of a frame when 
1926      * LoadSlicePointerFlag is 0
1927      */
1928     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1929     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1930     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1931     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1932
1933     ADVANCE_BCS_BATCH(batch);
1934 }
1935
1936 static int
1937 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1938                                  struct intel_encoder_context *encoder_context,
1939                                  int x, int y,
1940                                  int first_mb_in_slice,
1941                                  int last_mb_in_slice,
1942                                  int first_mb_in_slice_group,
1943                                  int last_mb_in_slice_group,
1944                                  int mb_type,
1945                                  int qp_scale_code,
1946                                  int coded_block_pattern,
1947                                  unsigned char target_size_in_word,
1948                                  unsigned char max_size_in_word,
1949                                  struct intel_batchbuffer *batch)
1950 {
1951     int len_in_dwords = 9;
1952
1953     if (batch == NULL)
1954         batch = encoder_context->base.batch;
1955
1956     BEGIN_BCS_BATCH(batch, len_in_dwords);
1957
1958     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1959     OUT_BCS_BATCH(batch,
1960                   0 << 24 |     /* PackedMvNum */
1961                   0 << 20 |     /* MvFormat */
1962                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1963                   0 << 15 |     /* TransformFlag: frame DCT */
1964                   0 << 14 |     /* FieldMbFlag */
1965                   1 << 13 |     /* IntraMbFlag */
1966                   mb_type << 8 |   /* MbType: Intra */
1967                   0 << 2 |      /* SkipMbFlag */
1968                   0 << 0 |      /* InterMbMode */
1969                   0);
1970     OUT_BCS_BATCH(batch, y << 16 | x);
1971     OUT_BCS_BATCH(batch,
1972                   max_size_in_word << 24 |
1973                   target_size_in_word << 16 |
1974                   coded_block_pattern << 6 |      /* CBP */
1975                   0);
1976     OUT_BCS_BATCH(batch,
1977                   last_mb_in_slice << 31 |
1978                   first_mb_in_slice << 30 |
1979                   0 << 27 |     /* EnableCoeffClamp */
1980                   last_mb_in_slice_group << 26 |
1981                   0 << 25 |     /* MbSkipConvDisable */
1982                   first_mb_in_slice_group << 24 |
1983                   0 << 16 |     /* MvFieldSelect */
1984                   qp_scale_code << 0 |
1985                   0);
1986     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1987     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1988     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1989     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1990
1991     ADVANCE_BCS_BATCH(batch);
1992
1993     return len_in_dwords;
1994 }
1995
1996 #define MPEG2_INTER_MV_OFFSET   12 
1997
1998 static struct _mv_ranges
1999 {
2000     int low;    /* in the unit of 1/2 pixel */
2001     int high;   /* in the unit of 1/2 pixel */
2002 } mv_ranges[] = {
2003     {0, 0},
2004     {-16, 15},
2005     {-32, 31},
2006     {-64, 63},
2007     {-128, 127},
2008     {-256, 255},
2009     {-512, 511},
2010     {-1024, 1023},
2011     {-2048, 2047},
2012     {-4096, 4095}
2013 };
2014
2015 static int
2016 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2017 {
2018     if (mv + pos * 16 * 2 < 0 ||
2019         mv + (pos + 1) * 16 * 2 > display_max * 2)
2020         mv = 0;
2021
2022     if (f_code > 0 && f_code < 10) {
2023         if (mv < mv_ranges[f_code].low)
2024             mv = mv_ranges[f_code].low;
2025
2026         if (mv > mv_ranges[f_code].high)
2027             mv = mv_ranges[f_code].high;
2028     }
2029
2030     return mv;
2031 }
2032
2033 static int
2034 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2035                                  struct encode_state *encode_state,
2036                                  struct intel_encoder_context *encoder_context,
2037                                  unsigned int *msg,
2038                                  int width_in_mbs, int height_in_mbs,
2039                                  int x, int y,
2040                                  int first_mb_in_slice,
2041                                  int last_mb_in_slice,
2042                                  int first_mb_in_slice_group,
2043                                  int last_mb_in_slice_group,
2044                                  int qp_scale_code,
2045                                  unsigned char target_size_in_word,
2046                                  unsigned char max_size_in_word,
2047                                  struct intel_batchbuffer *batch)
2048 {
2049     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2050     int len_in_dwords = 9;
2051     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2052     
2053     if (batch == NULL)
2054         batch = encoder_context->base.batch;
2055
2056     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2057     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2058     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2059     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2060     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2061
2062     BEGIN_BCS_BATCH(batch, len_in_dwords);
2063
2064     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2065     OUT_BCS_BATCH(batch,
2066                   2 << 24 |     /* PackedMvNum */
2067                   7 << 20 |     /* MvFormat */
2068                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2069                   0 << 15 |     /* TransformFlag: frame DCT */
2070                   0 << 14 |     /* FieldMbFlag */
2071                   0 << 13 |     /* IntraMbFlag */
2072                   1 << 8 |      /* MbType: Frame-based */
2073                   0 << 2 |      /* SkipMbFlag */
2074                   0 << 0 |      /* InterMbMode */
2075                   0);
2076     OUT_BCS_BATCH(batch, y << 16 | x);
2077     OUT_BCS_BATCH(batch,
2078                   max_size_in_word << 24 |
2079                   target_size_in_word << 16 |
2080                   0x3f << 6 |   /* CBP */
2081                   0);
2082     OUT_BCS_BATCH(batch,
2083                   last_mb_in_slice << 31 |
2084                   first_mb_in_slice << 30 |
2085                   0 << 27 |     /* EnableCoeffClamp */
2086                   last_mb_in_slice_group << 26 |
2087                   0 << 25 |     /* MbSkipConvDisable */
2088                   first_mb_in_slice_group << 24 |
2089                   0 << 16 |     /* MvFieldSelect */
2090                   qp_scale_code << 0 |
2091                   0);
2092
2093     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2094     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2095     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2096     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2097
2098     ADVANCE_BCS_BATCH(batch);
2099
2100     return len_in_dwords;
2101 }
2102
2103 static void
2104 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2105                                            struct encode_state *encode_state,
2106                                            struct intel_encoder_context *encoder_context,
2107                                            struct intel_batchbuffer *slice_batch)
2108 {
2109     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2110     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2111
2112     if (encode_state->packed_header_data[idx]) {
2113         VAEncPackedHeaderParameterBuffer *param = NULL;
2114         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2115         unsigned int length_in_bits;
2116
2117         assert(encode_state->packed_header_param[idx]);
2118         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2119         length_in_bits = param->bit_length;
2120
2121         mfc_context->insert_object(ctx,
2122                                    encoder_context,
2123                                    header_data,
2124                                    ALIGN(length_in_bits, 32) >> 5,
2125                                    length_in_bits & 0x1f,
2126                                    5,   /* FIXME: check it */
2127                                    0,
2128                                    0,
2129                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2130                                    slice_batch);
2131     }
2132
2133     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2134
2135     if (encode_state->packed_header_data[idx]) {
2136         VAEncPackedHeaderParameterBuffer *param = NULL;
2137         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2138         unsigned int length_in_bits;
2139
2140         assert(encode_state->packed_header_param[idx]);
2141         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2142         length_in_bits = param->bit_length;
2143
2144         mfc_context->insert_object(ctx,
2145                                    encoder_context,
2146                                    header_data,
2147                                    ALIGN(length_in_bits, 32) >> 5,
2148                                    length_in_bits & 0x1f,
2149                                    5,   /* FIXME: check it */
2150                                    0,
2151                                    0,
2152                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2153                                    slice_batch);
2154     }
2155 }
2156
2157 static void 
2158 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2159                                      struct encode_state *encode_state,
2160                                      struct intel_encoder_context *encoder_context,
2161                                      int slice_index,
2162                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2163                                      struct intel_batchbuffer *slice_batch)
2164 {
2165     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2166     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2167     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2168     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2169     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2170     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2171     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2172     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2173     int i, j;
2174     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2175     unsigned int *msg = NULL;
2176     unsigned char *msg_ptr = NULL;
2177
2178     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2179     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2180     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2181     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2182
2183     dri_bo_map(vme_context->vme_output.bo , 0);
2184     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2185
2186     if (next_slice_group_param) {
2187         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2188         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2189     } else {
2190         h_next_start_pos = 0;
2191         v_next_start_pos = height_in_mbs;
2192     }
2193
2194     gen75_mfc_mpeg2_slicegroup_state(ctx,
2195                                      encoder_context,
2196                                      h_start_pos,
2197                                      v_start_pos,
2198                                      h_next_start_pos,
2199                                      v_next_start_pos,
2200                                      slice_index == 0,
2201                                      next_slice_group_param == NULL,
2202                                      slice_param->is_intra_slice,
2203                                      slice_param->quantiser_scale_code,
2204                                      slice_batch);
2205
2206     if (slice_index == 0) 
2207         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2208
2209     /* Insert '00' to make sure the header is valid */
2210     mfc_context->insert_object(ctx,
2211                                encoder_context,
2212                                (unsigned int*)section_delimiter,
2213                                1,
2214                                8,   /* 8bits in the last DWORD */
2215                                1,   /* 1 byte */
2216                                1,
2217                                0,
2218                                0,
2219                                slice_batch);
2220
2221     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2222         /* PAK for each macroblocks */
2223         for (j = 0; j < slice_param->num_macroblocks; j++) {
2224             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2225             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2226             int first_mb_in_slice = (j == 0);
2227             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2228             int first_mb_in_slice_group = (i == 0 && j == 0);
2229             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2230                                           j == slice_param->num_macroblocks - 1);
2231
2232             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2233
2234             if (slice_param->is_intra_slice) {
2235                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2236                                                  encoder_context,
2237                                                  h_pos, v_pos,
2238                                                  first_mb_in_slice,
2239                                                  last_mb_in_slice,
2240                                                  first_mb_in_slice_group,
2241                                                  last_mb_in_slice_group,
2242                                                  0x1a,
2243                                                  slice_param->quantiser_scale_code,
2244                                                  0x3f,
2245                                                  0,
2246                                                  0xff,
2247                                                  slice_batch);
2248             } else {
2249                 int inter_rdo, intra_rdo;
2250                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2251                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2252
2253                 if (intra_rdo < inter_rdo) 
2254                         gen75_mfc_mpeg2_pak_object_intra(ctx,
2255                                                  encoder_context,
2256                                                  h_pos, v_pos,
2257                                                  first_mb_in_slice,
2258                                                  last_mb_in_slice,
2259                                                  first_mb_in_slice_group,
2260                                                  last_mb_in_slice_group,
2261                                                  0x1a,
2262                                                  slice_param->quantiser_scale_code,
2263                                                  0x3f,
2264                                                  0,
2265                                                  0xff,
2266                                                  slice_batch);
2267                 else
2268                         gen75_mfc_mpeg2_pak_object_inter(ctx,
2269                                                  encode_state,
2270                                                  encoder_context,
2271                                                  msg,
2272                                                  width_in_mbs, height_in_mbs,
2273                                                  h_pos, v_pos,
2274                                                  first_mb_in_slice,
2275                                                  last_mb_in_slice,
2276                                                  first_mb_in_slice_group,
2277                                                  last_mb_in_slice_group,
2278                                                  slice_param->quantiser_scale_code,
2279                                                  0,
2280                                                  0xff,
2281                                                  slice_batch);
2282             }
2283         }
2284
2285         slice_param++;
2286     }
2287
2288     dri_bo_unmap(vme_context->vme_output.bo);
2289
2290     /* tail data */
2291     if (next_slice_group_param == NULL) { /* end of a picture */
2292         mfc_context->insert_object(ctx,
2293                                    encoder_context,
2294                                    (unsigned int *)tail_delimiter,
2295                                    2,
2296                                    8,   /* 8bits in the last DWORD */
2297                                    5,   /* 5 bytes */
2298                                    1,
2299                                    1,
2300                                    0,
2301                                    slice_batch);
2302     } else {        /* end of a lsice group */
2303         mfc_context->insert_object(ctx,
2304                                    encoder_context,
2305                                    (unsigned int *)section_delimiter,
2306                                    1,
2307                                    8,   /* 8bits in the last DWORD */
2308                                    1,   /* 1 byte */
2309                                    1,
2310                                    1,
2311                                    0,
2312                                    slice_batch);
2313     }
2314 }
2315
2316 /* 
2317  * A batch buffer for all slices, including slice state, 
2318  * slice insert object and slice pak object commands
2319  *
2320  */
2321 static dri_bo *
2322 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2323                                            struct encode_state *encode_state,
2324                                            struct intel_encoder_context *encoder_context)
2325 {
2326     struct i965_driver_data *i965 = i965_driver_data(ctx);
2327     struct intel_batchbuffer *batch;
2328     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2329     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2330     dri_bo *batch_bo;
2331     int i;
2332     int buffer_size;
2333     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2334     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2335
2336     buffer_size = width_in_mbs * height_in_mbs * 64;
2337     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2338     batch_bo = batch->buffer;
2339
2340     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2341         if (i == encode_state->num_slice_params_ext - 1)
2342             next_slice_group_param = NULL;
2343         else
2344             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2345
2346         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2347     }
2348
2349     intel_batchbuffer_align(batch, 8);
2350     
2351     BEGIN_BCS_BATCH(batch, 2);
2352     OUT_BCS_BATCH(batch, 0);
2353     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2354     ADVANCE_BCS_BATCH(batch);
2355
2356     dri_bo_reference(batch_bo);
2357     intel_batchbuffer_free(batch);
2358
2359     return batch_bo;
2360 }
2361
2362 static void
2363 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2364                                             struct encode_state *encode_state,
2365                                             struct intel_encoder_context *encoder_context)
2366 {
2367     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2368
2369     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2370     mfc_context->set_surface_state(ctx, encoder_context);
2371     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2372     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2373     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2374     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2375     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2376     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2377 }
2378
2379 static void
2380 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2381                                     struct encode_state *encode_state,
2382                                     struct intel_encoder_context *encoder_context)
2383 {
2384     struct intel_batchbuffer *batch = encoder_context->base.batch;
2385     dri_bo *slice_batch_bo;
2386
2387     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2388
2389     // begin programing
2390     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2391     intel_batchbuffer_emit_mi_flush(batch);
2392     
2393     // picture level programing
2394     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2395
2396     BEGIN_BCS_BATCH(batch, 2);
2397     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2398     OUT_BCS_RELOC(batch,
2399                   slice_batch_bo,
2400                   I915_GEM_DOMAIN_COMMAND, 0, 
2401                   0);
2402     ADVANCE_BCS_BATCH(batch);
2403
2404     // end programing
2405     intel_batchbuffer_end_atomic(batch);
2406
2407     dri_bo_unreference(slice_batch_bo);
2408 }
2409
2410 static VAStatus
2411 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2412                         struct encode_state *encode_state,
2413                         struct intel_encoder_context *encoder_context)
2414 {
2415     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2416     struct object_surface *obj_surface; 
2417     struct object_buffer *obj_buffer;
2418     struct i965_coded_buffer_segment *coded_buffer_segment;
2419     VAStatus vaStatus = VA_STATUS_SUCCESS;
2420     dri_bo *bo;
2421     int i;
2422
2423     /* reconstructed surface */
2424     obj_surface = encode_state->reconstructed_object;
2425     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2426     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2427     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2428     mfc_context->surface_state.width = obj_surface->orig_width;
2429     mfc_context->surface_state.height = obj_surface->orig_height;
2430     mfc_context->surface_state.w_pitch = obj_surface->width;
2431     mfc_context->surface_state.h_pitch = obj_surface->height;
2432
2433     /* forward reference */
2434     obj_surface = encode_state->reference_objects[0];
2435
2436     if (obj_surface && obj_surface->bo) {
2437         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2438         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2439     } else
2440         mfc_context->reference_surfaces[0].bo = NULL;
2441
2442     /* backward reference */
2443     obj_surface = encode_state->reference_objects[1];
2444
2445     if (obj_surface && obj_surface->bo) {
2446         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2447         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2448     } else {
2449         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2450
2451         if (mfc_context->reference_surfaces[1].bo)
2452             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2453     }
2454
2455     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2456         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2457
2458         if (mfc_context->reference_surfaces[i].bo)
2459             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2460     }
2461     
2462     /* input YUV surface */
2463     obj_surface = encode_state->input_yuv_object;
2464     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2465     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2466
2467     /* coded buffer */
2468     obj_buffer = encode_state->coded_buf_object;
2469     bo = obj_buffer->buffer_store->bo;
2470     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2471     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2472     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2473     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2474
2475     /* set the internal flag to 0 to indicate the coded size is unknown */
2476     dri_bo_map(bo, 1);
2477     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2478     coded_buffer_segment->mapped = 0;
2479     coded_buffer_segment->codec = encoder_context->codec;
2480     dri_bo_unmap(bo);
2481
2482     return vaStatus;
2483 }
2484
2485 static VAStatus
2486 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2487                                struct encode_state *encode_state,
2488                                struct intel_encoder_context *encoder_context)
2489 {
2490     gen75_mfc_init(ctx, encode_state, encoder_context);
2491     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2492     /*Programing bcs pipeline*/
2493     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2494     gen75_mfc_run(ctx, encode_state, encoder_context);
2495
2496     return VA_STATUS_SUCCESS;
2497 }
2498
2499 static void
2500 gen75_mfc_context_destroy(void *context)
2501 {
2502     struct gen6_mfc_context *mfc_context = context;
2503     int i;
2504
2505     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2506     mfc_context->post_deblocking_output.bo = NULL;
2507
2508     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2509     mfc_context->pre_deblocking_output.bo = NULL;
2510
2511     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2512     mfc_context->uncompressed_picture_source.bo = NULL;
2513
2514     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2515     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2516
2517     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2518         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2519         mfc_context->direct_mv_buffers[i].bo = NULL;
2520     }
2521
2522     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2523     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2526     mfc_context->macroblock_status_buffer.bo = NULL;
2527
2528     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2529     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2530
2531     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2532     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2533
2534     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2535         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2536         mfc_context->reference_surfaces[i].bo = NULL;  
2537     }
2538
2539     i965_gpe_context_destroy(&mfc_context->gpe_context);
2540
2541     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2542     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2543
2544     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2545     mfc_context->aux_batchbuffer_surface.bo = NULL;
2546
2547     if (mfc_context->aux_batchbuffer)
2548         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2549
2550     mfc_context->aux_batchbuffer = NULL;
2551
2552     free(mfc_context);
2553 }
2554
2555 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2556                   VAProfile profile,
2557                   struct encode_state *encode_state,
2558                   struct intel_encoder_context *encoder_context)
2559 {
2560     VAStatus vaStatus;
2561
2562     switch (profile) {
2563     case VAProfileH264Baseline:
2564     case VAProfileH264Main:
2565     case VAProfileH264High:
2566         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2567         break;
2568
2569         /* FIXME: add for other profile */
2570     case VAProfileMPEG2Simple:
2571     case VAProfileMPEG2Main:
2572         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2573         break;
2574
2575     default:
2576         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2577         break;
2578     }
2579
2580     return vaStatus;
2581 }
2582
2583 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2584 {
2585     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2586
2587     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2588
2589     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2590     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2591
2592     mfc_context->gpe_context.curbe.length = 32 * 4;
2593
2594     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2595     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2596     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2597     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2598     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2599
2600     i965_gpe_load_kernels(ctx,
2601                           &mfc_context->gpe_context,
2602                           gen75_mfc_kernels,
2603                           NUM_MFC_KERNEL);
2604
2605     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2606     mfc_context->set_surface_state = gen75_mfc_surface_state;
2607     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2608     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2609     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2610     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2611     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2612     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2613
2614     encoder_context->mfc_context = mfc_context;
2615     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2616     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2617     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2618
2619     return True;
2620 }