Pass the reference frame index in List0/1 into the PAK command
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                            int standard_select,
90                            struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                         struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164     /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                         struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294     /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300     /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303     /* DW10. Bit setting for MB */
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306     /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309     /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                    int qm_type,
319                    unsigned int *qm,
320                    int qm_length,
321                    struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                     int fqm_type,
356                     unsigned int *fqm,
357                     int fqm_length,
358                     struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                             struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                            struct encode_state *encode_state,
422                            struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430
431     if (encoder_context->codec == CODEC_H264) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     /*Encode common setup for MFC*/
445     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
446     mfc_context->post_deblocking_output.bo = NULL;
447
448     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
449     mfc_context->pre_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
452     mfc_context->uncompressed_picture_source.bo = NULL;
453
454     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
455     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
456
457     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
458         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
459         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
460         mfc_context->direct_mv_buffers[i].bo = NULL;
461     }
462
463     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
464         if (mfc_context->reference_surfaces[i].bo != NULL)
465             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
466         mfc_context->reference_surfaces[i].bo = NULL;  
467     }
468
469     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       width_in_mbs * 64,
473                       64);
474     assert(bo);
475     mfc_context->intra_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
478     bo = dri_bo_alloc(i965->intel.bufmgr,
479                       "Buffer",
480                       width_in_mbs * height_in_mbs * 16,
481                       64);
482     assert(bo);
483     mfc_context->macroblock_status_buffer.bo = bo;
484
485     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
486     bo = dri_bo_alloc(i965->intel.bufmgr,
487                       "Buffer",
488                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
489                       64);
490     assert(bo);
491     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
492
493     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
494     bo = dri_bo_alloc(i965->intel.bufmgr,
495                       "Buffer",
496                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
497                       0x1000);
498     assert(bo);
499     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
500
501     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
502     mfc_context->mfc_batchbuffer_surface.bo = NULL;
503
504     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
505     mfc_context->aux_batchbuffer_surface.bo = NULL;
506
507     if (mfc_context->aux_batchbuffer)
508         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
509
510     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
511     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
512     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
513     mfc_context->aux_batchbuffer_surface.pitch = 16;
514     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
515     mfc_context->aux_batchbuffer_surface.size_block = 16;
516
517     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
518 }
519
520 static void
521 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
522                                     struct intel_encoder_context *encoder_context)
523 {
524     struct intel_batchbuffer *batch = encoder_context->base.batch;
525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526     int i;
527
528     BEGIN_BCS_BATCH(batch, 61);
529
530     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
531
532     /* the DW1-3 is for pre_deblocking */
533     if (mfc_context->pre_deblocking_output.bo)
534         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
535                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                       0);
537     else
538         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
539
540     OUT_BCS_BATCH(batch, 0);
541     OUT_BCS_BATCH(batch, 0);
542     /* the DW4-6 is for the post_deblocking */
543
544     if (mfc_context->post_deblocking_output.bo)
545         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
546                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                       0);                                                                                       /* post output addr  */ 
548     else
549         OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551     OUT_BCS_BATCH(batch, 0);
552
553     /* the DW7-9 is for the uncompressed_picture */
554     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* uncompressed data */
557
558     OUT_BCS_BATCH(batch, 0);
559     OUT_BCS_BATCH(batch, 0);
560
561     /* the DW10-12 is for the mb status */
562     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0); /* StreamOut data*/
565     OUT_BCS_BATCH(batch, 0);
566     OUT_BCS_BATCH(batch, 0);
567
568     /* the DW13-15 is for the intra_row_store_scratch */
569     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
570                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571                   0);   
572     OUT_BCS_BATCH(batch, 0);
573     OUT_BCS_BATCH(batch, 0);
574
575     /* the DW16-18 is for the deblocking filter */
576     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);
579     OUT_BCS_BATCH(batch, 0);
580     OUT_BCS_BATCH(batch, 0);
581
582     /* the DW 19-50 is for Reference pictures*/
583     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
584         if ( mfc_context->reference_surfaces[i].bo != NULL) {
585             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
586                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
587                           0);                   
588         } else {
589             OUT_BCS_BATCH(batch, 0);
590         }
591         OUT_BCS_BATCH(batch, 0);
592     }
593     OUT_BCS_BATCH(batch, 0);
594
595     /* The DW 52-54 is for the MB status buffer */
596     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
597                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598                   0);                                                                                   /* Macroblock status buffer*/
599         
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602
603     /* the DW 55-57 is the ILDB buffer */
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607
608     /* the DW 58-60 is the second ILDB buffer */
609     OUT_BCS_BATCH(batch, 0);
610     OUT_BCS_BATCH(batch, 0);
611     OUT_BCS_BATCH(batch, 0);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void
616 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
617 {
618     struct intel_batchbuffer *batch = encoder_context->base.batch;
619     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
620     struct i965_driver_data *i965 = i965_driver_data(ctx);
621     int i;
622
623     if (IS_STEPPING_BPLUS(i965)) {
624         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
625         return;
626     }
627
628     BEGIN_BCS_BATCH(batch, 25);
629
630     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
631
632     if (mfc_context->pre_deblocking_output.bo)
633         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
634                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                       0);
636     else
637         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
638
639     if (mfc_context->post_deblocking_output.bo)
640         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
641                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                       0);                                                                                       /* post output addr  */ 
643     else
644         OUT_BCS_BATCH(batch, 0);
645
646     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
647                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                   0);                                                                                   /* uncompressed data */
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* StreamOut data*/
652     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);   
655     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);
658     /* 7..22 Reference pictures*/
659     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
660         if ( mfc_context->reference_surfaces[i].bo != NULL) {
661             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
662                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                           0);                   
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666         }
667     }
668     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
669                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670                   0);                                                                                   /* Macroblock status buffer*/
671
672     OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
679                                      struct intel_encoder_context *encoder_context)
680 {
681     struct intel_batchbuffer *batch = encoder_context->base.batch;
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     int i;
685
686     BEGIN_BCS_BATCH(batch, 71);
687
688     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
689
690     /* Reference frames and Current frames */
691     /* the DW1-32 is for the direct MV for reference */
692     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
693         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
694             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697             OUT_BCS_BATCH(batch, 0);
698         } else {
699             OUT_BCS_BATCH(batch, 0);
700             OUT_BCS_BATCH(batch, 0);
701         }
702     }
703     OUT_BCS_BATCH(batch, 0);
704
705     /* the DW34-36 is the MV for the current reference */
706     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
707                   I915_GEM_DOMAIN_INSTRUCTION, 0,
708                   0);
709
710     OUT_BCS_BATCH(batch, 0);
711     OUT_BCS_BATCH(batch, 0);
712
713     /* POL list */
714     for(i = 0; i < 32; i++) {
715         OUT_BCS_BATCH(batch, i/2);
716     }
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719
720     ADVANCE_BCS_BATCH(batch);
721 }
722
723 static void
724 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
725 {
726     struct intel_batchbuffer *batch = encoder_context->base.batch;
727     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     int i;
730
731     if (IS_STEPPING_BPLUS(i965)) {
732         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
733         return;
734     }
735
736     BEGIN_BCS_BATCH(batch, 69);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
739
740     /* Reference frames and Current frames */
741     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
742         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
743             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
744                           I915_GEM_DOMAIN_INSTRUCTION, 0,
745                           0);
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     /* POL list */
752     for(i = 0; i < 32; i++) {
753         OUT_BCS_BATCH(batch, i/2);
754     }
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757
758     ADVANCE_BCS_BATCH(batch);
759 }
760
761
762 static void
763 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
764                                         struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
768
769     BEGIN_BCS_BATCH(batch, 10);
770
771     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
772     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
773                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
774                   0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777         
778     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782
783     /* the DW7-9 is for Bitplane Read Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static void
792 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
793 {
794     struct intel_batchbuffer *batch = encoder_context->base.batch;
795     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797
798     if (IS_STEPPING_BPLUS(i965)) {
799         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
800         return;
801     }
802
803     BEGIN_BCS_BATCH(batch, 4);
804
805     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
806     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808                   0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815
816 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
817                                                        struct encode_state *encode_state,
818                                                        struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
821
822     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
823     mfc_context->set_surface_state(ctx, encoder_context);
824     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
825     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
826     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
827     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
828     mfc_context->avc_qm_state(ctx, encoder_context);
829     mfc_context->avc_fqm_state(ctx, encoder_context);
830     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
831     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
832 }
833
834
835 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
836                               struct encode_state *encode_state,
837                               struct intel_encoder_context *encoder_context)
838 {
839     struct intel_batchbuffer *batch = encoder_context->base.batch;
840
841     intel_batchbuffer_flush(batch);             //run the pipeline
842
843     return VA_STATUS_SUCCESS;
844 }
845
846
847 static VAStatus
848 gen75_mfc_stop(VADriverContextP ctx, 
849                struct encode_state *encode_state,
850                struct intel_encoder_context *encoder_context,
851                int *encoded_bits_size)
852 {
853     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
854     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855     VACodedBufferSegment *coded_buffer_segment;
856     
857     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
858     assert(vaStatus == VA_STATUS_SUCCESS);
859     *encoded_bits_size = coded_buffer_segment->size * 8;
860     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
861
862     return VA_STATUS_SUCCESS;
863 }
864
865
866 static void
867 gen75_mfc_avc_slice_state(VADriverContextP ctx,
868                           VAEncPictureParameterBufferH264 *pic_param,
869                           VAEncSliceParameterBufferH264 *slice_param,
870                           struct encode_state *encode_state,
871                           struct intel_encoder_context *encoder_context,
872                           int rate_control_enable,
873                           int qp,
874                           struct intel_batchbuffer *batch)
875 {
876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
877     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
878     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
879     int beginmb = slice_param->macroblock_address;
880     int endmb = beginmb + slice_param->num_macroblocks;
881     int beginx = beginmb % width_in_mbs;
882     int beginy = beginmb / width_in_mbs;
883     int nextx =  endmb % width_in_mbs;
884     int nexty = endmb / width_in_mbs;
885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
886     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
887     int maxQpN, maxQpP;
888     unsigned char correct[6], grow, shrink;
889     int i;
890     int bslice = 0;
891     int weighted_pred_idc = 0;
892     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
893     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
894
895     if (batch == NULL)
896         batch = encoder_context->base.batch;
897
898     if (slice_type == SLICE_TYPE_P) {
899         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
900     } else if (slice_type == SLICE_TYPE_B) {
901         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
902         bslice = 1;
903
904         if (weighted_pred_idc == 2) {
905             /* 8.4.3 - Derivation process for prediction weights (8-279) */
906             luma_log2_weight_denom = 5;
907             chroma_log2_weight_denom = 5;
908         }
909     }
910
911     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
912     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
913
914     for (i = 0; i < 6; i++)
915         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
916
917     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
918         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
919     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
920         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
921
922     BEGIN_BCS_BATCH(batch, 11);;
923
924     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
925     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
926
927     if (slice_type == SLICE_TYPE_I) {
928         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
929     } else {
930         OUT_BCS_BATCH(batch,
931                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
932                       (chroma_log2_weight_denom << 8) |
933                       (luma_log2_weight_denom << 0));
934     }
935
936     OUT_BCS_BATCH(batch, 
937                   (weighted_pred_idc << 30) |
938                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
939                   (slice_param->disable_deblocking_filter_idc << 27) |
940                   (slice_param->cabac_init_idc << 24) |
941                   (qp<<16) |                    /*Slice Quantization Parameter*/
942                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
943                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
944     OUT_BCS_BATCH(batch,
945                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
946                   (beginx << 16) |
947                   slice_param->macroblock_address );
948     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
949     OUT_BCS_BATCH(batch, 
950                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
951                   (1 << 30) |           /*ResetRateControlCounter*/
952                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
953                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
954                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
955                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
956                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
957                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
958                   (last_slice << 19) |     /*IsLastSlice*/
959                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
960                   (1 << 17) |       /*HeaderPresentFlag*/       
961                   (1 << 16) |       /*SliceData PresentFlag*/
962                   (1 << 15) |       /*TailPresentFlag*/
963                   (1 << 13) |       /*RBSP NAL TYPE*/   
964                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
965     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
966     OUT_BCS_BATCH(batch,
967                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
968                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
969                   (shrink << 8)  |
970                   (grow << 0));   
971     OUT_BCS_BATCH(batch,
972                   (correct[5] << 20) |
973                   (correct[4] << 16) |
974                   (correct[3] << 12) |
975                   (correct[2] << 8) |
976                   (correct[1] << 4) |
977                   (correct[0] << 0));
978     OUT_BCS_BATCH(batch, 0);
979
980     ADVANCE_BCS_BATCH(batch);
981 }
982
983
984 #ifdef MFC_SOFTWARE_HASWELL
985
986 static int
987 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
988                                int qp,unsigned int *msg,
989                                struct intel_encoder_context *encoder_context,
990                                unsigned char target_mb_size, unsigned char max_mb_size,
991                                struct intel_batchbuffer *batch)
992 {
993     int len_in_dwords = 12;
994     unsigned int intra_msg;
995 #define         INTRA_MSG_FLAG          (1 << 13)
996 #define         INTRA_MBTYPE_MASK       (0x1F0000)
997     if (batch == NULL)
998         batch = encoder_context->base.batch;
999
1000     BEGIN_BCS_BATCH(batch, len_in_dwords);
1001
1002     intra_msg = msg[0] & 0xC0FF;
1003     intra_msg |= INTRA_MSG_FLAG;
1004     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1005     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1006     OUT_BCS_BATCH(batch, 0);
1007     OUT_BCS_BATCH(batch, 0);
1008     OUT_BCS_BATCH(batch, 
1009                   (0 << 24) |           /* PackedMvNum, Debug*/
1010                   (0 << 20) |           /* No motion vector */
1011                   (1 << 19) |           /* CbpDcY */
1012                   (1 << 18) |           /* CbpDcU */
1013                   (1 << 17) |           /* CbpDcV */
1014                   intra_msg);
1015
1016     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1017     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1018     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1019
1020     /*Stuff for Intra MB*/
1021     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1022     OUT_BCS_BATCH(batch, msg[2]);       
1023     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1024     
1025     /*MaxSizeInWord and TargetSzieInWord*/
1026     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1027                   (target_mb_size << 16) );
1028
1029     OUT_BCS_BATCH(batch, 0);
1030
1031     ADVANCE_BCS_BATCH(batch);
1032
1033     return len_in_dwords;
1034 }
1035
1036 static int
1037 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1038                                unsigned int *msg, unsigned int offset,
1039                                struct intel_encoder_context *encoder_context,
1040                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1041                                struct intel_batchbuffer *batch)
1042 {
1043     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1044     int len_in_dwords = 12;
1045     unsigned int inter_msg = 0;
1046     if (batch == NULL)
1047         batch = encoder_context->base.batch;
1048     {
1049 #define MSG_MV_OFFSET   4
1050         unsigned int *mv_ptr;
1051         mv_ptr = msg + MSG_MV_OFFSET;
1052         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1053          * to convert them to be compatible with the format of AVC_PAK
1054          * command.
1055          */
1056         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1057             /* MV[0] and MV[2] are replicated */
1058             mv_ptr[4] = mv_ptr[0];
1059             mv_ptr[5] = mv_ptr[1];
1060             mv_ptr[2] = mv_ptr[8];
1061             mv_ptr[3] = mv_ptr[9];
1062             mv_ptr[6] = mv_ptr[8];
1063             mv_ptr[7] = mv_ptr[9];
1064         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1065             /* MV[0] and MV[1] are replicated */
1066             mv_ptr[2] = mv_ptr[0];
1067             mv_ptr[3] = mv_ptr[1];
1068             mv_ptr[4] = mv_ptr[16];
1069             mv_ptr[5] = mv_ptr[17];
1070             mv_ptr[6] = mv_ptr[24];
1071             mv_ptr[7] = mv_ptr[25];
1072         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1073                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1074             /* Don't touch MV[0] or MV[1] */
1075             mv_ptr[2] = mv_ptr[8];
1076             mv_ptr[3] = mv_ptr[9];
1077             mv_ptr[4] = mv_ptr[16];
1078             mv_ptr[5] = mv_ptr[17];
1079             mv_ptr[6] = mv_ptr[24];
1080             mv_ptr[7] = mv_ptr[25];
1081         }
1082     }
1083
1084     BEGIN_BCS_BATCH(batch, len_in_dwords);
1085
1086     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1087
1088     inter_msg = 32;
1089     /* MV quantity */
1090     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1091         if (msg[1] & SUBMB_SHAPE_MASK)
1092             inter_msg = 128;
1093     }
1094     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1095     OUT_BCS_BATCH(batch, offset);
1096     inter_msg = msg[0] & (0x1F00FFFF);
1097     inter_msg |= INTER_MV8;
1098     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1099     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1100         (msg[1] & SUBMB_SHAPE_MASK)) {
1101         inter_msg |= INTER_MV32;
1102     }
1103
1104     OUT_BCS_BATCH(batch, inter_msg);
1105
1106     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1107     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1108 #if 0 
1109     if ( slice_type == SLICE_TYPE_B) {
1110         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1111     } else {
1112         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1113     }
1114 #else
1115     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1116 #endif
1117
1118     inter_msg = msg[1] >> 8;
1119     /*Stuff for Inter MB*/
1120     OUT_BCS_BATCH(batch, inter_msg);        
1121     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1122     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1123
1124     /*MaxSizeInWord and TargetSzieInWord*/
1125     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1126                   (target_mb_size << 16) );
1127
1128     OUT_BCS_BATCH(batch, 0x0);    
1129
1130     ADVANCE_BCS_BATCH(batch);
1131
1132     return len_in_dwords;
1133 }
1134
1135 #define         AVC_INTRA_RDO_OFFSET    4
1136 #define         AVC_INTER_RDO_OFFSET    10
1137 #define         AVC_INTER_MSG_OFFSET    8       
1138 #define         AVC_INTER_MV_OFFSET             48
1139 #define         AVC_RDO_MASK            0xFFFF
1140
1141 static void 
1142 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1143                                         struct encode_state *encode_state,
1144                                         struct intel_encoder_context *encoder_context,
1145                                         int slice_index,
1146                                         struct intel_batchbuffer *slice_batch)
1147 {
1148     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1149     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1150     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1151     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1152     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1153     unsigned int *msg = NULL, offset = 0;
1154     unsigned char *msg_ptr = NULL;
1155     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1156     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1157     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1158     int i,x,y;
1159     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1160     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1161     unsigned char *slice_header = NULL;
1162     int slice_header_length_in_bits = 0;
1163     unsigned int tail_data[] = { 0x0, 0x0 };
1164     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1165     int is_intra = slice_type == SLICE_TYPE_I;
1166
1167     if (rate_control_mode == VA_RC_CBR) {
1168         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1169         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1170     }
1171
1172     /* only support for 8-bit pixel bit-depth */
1173     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1174     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1175     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1176     assert(qp >= 0 && qp < 52);
1177
1178     gen75_mfc_avc_slice_state(ctx, 
1179                               pPicParameter,
1180                               pSliceParameter,
1181                               encode_state, encoder_context,
1182                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1183
1184     if ( slice_index == 0) 
1185         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1186
1187     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1188
1189     // slice hander
1190     mfc_context->insert_object(ctx, encoder_context,
1191                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1192                                5,  /* first 5 bytes are start code + nal unit type */
1193                                1, 0, 1, slice_batch);
1194
1195     dri_bo_map(vme_context->vme_output.bo , 1);
1196     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1197
1198     if (is_intra) {
1199         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1200     } else {
1201         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1202     }
1203    
1204     for (i = pSliceParameter->macroblock_address; 
1205          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1206         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1207         x = i % width_in_mbs;
1208         y = i / width_in_mbs;
1209         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1210
1211         if (is_intra) {
1212             assert(msg);
1213             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1214         } else {
1215             int inter_rdo, intra_rdo;
1216             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1217             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1218             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1219             if (intra_rdo < inter_rdo) { 
1220                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1221             } else {
1222                 msg += AVC_INTER_MSG_OFFSET;
1223                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1224             }
1225         }
1226     }
1227    
1228     dri_bo_unmap(vme_context->vme_output.bo);
1229
1230     if ( last_slice ) {    
1231         mfc_context->insert_object(ctx, encoder_context,
1232                                    tail_data, 2, 8,
1233                                    2, 1, 1, 0, slice_batch);
1234     } else {
1235         mfc_context->insert_object(ctx, encoder_context,
1236                                    tail_data, 1, 8,
1237                                    1, 1, 1, 0, slice_batch);
1238     }
1239
1240     free(slice_header);
1241
1242 }
1243
1244 static dri_bo *
1245 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1246                                    struct encode_state *encode_state,
1247                                    struct intel_encoder_context *encoder_context)
1248 {
1249     struct i965_driver_data *i965 = i965_driver_data(ctx);
1250     struct intel_batchbuffer *batch;
1251     dri_bo *batch_bo;
1252     int i;
1253     int buffer_size;
1254     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1255     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1256     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1257
1258     buffer_size = width_in_mbs * height_in_mbs * 64;
1259     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1260     batch_bo = batch->buffer;
1261     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1262         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1263     }
1264
1265     intel_batchbuffer_align(batch, 8);
1266     
1267     BEGIN_BCS_BATCH(batch, 2);
1268     OUT_BCS_BATCH(batch, 0);
1269     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1270     ADVANCE_BCS_BATCH(batch);
1271
1272     dri_bo_reference(batch_bo);
1273     intel_batchbuffer_free(batch);
1274
1275     return batch_bo;
1276 }
1277
1278 #else
1279
1280 static void
1281 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1282                                      struct encode_state *encode_state,
1283                                      struct intel_encoder_context *encoder_context)
1284
1285 {
1286     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1287     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1288
1289     assert(vme_context->vme_output.bo);
1290     mfc_context->buffer_suface_setup(ctx,
1291                                      &mfc_context->gpe_context,
1292                                      &vme_context->vme_output,
1293                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1294                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1295     assert(mfc_context->aux_batchbuffer_surface.bo);
1296     mfc_context->buffer_suface_setup(ctx,
1297                                      &mfc_context->gpe_context,
1298                                      &mfc_context->aux_batchbuffer_surface,
1299                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1300                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1301 }
1302
1303 static void
1304 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1305                                       struct encode_state *encode_state,
1306                                       struct intel_encoder_context *encoder_context)
1307
1308 {
1309     struct i965_driver_data *i965 = i965_driver_data(ctx);
1310     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1311     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1312     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1313     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1314     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1315     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1316     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1317     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1318                                                            "MFC batchbuffer",
1319                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1320                                                            0x1000);
1321     mfc_context->buffer_suface_setup(ctx,
1322                                      &mfc_context->gpe_context,
1323                                      &mfc_context->mfc_batchbuffer_surface,
1324                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1325                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1326 }
1327
1328 static void
1329 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1330                                      struct encode_state *encode_state,
1331                                      struct intel_encoder_context *encoder_context)
1332 {
1333     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1334     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1335 }
1336
1337 static void
1338 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1339                                  struct encode_state *encode_state,
1340                                  struct intel_encoder_context *encoder_context)
1341 {
1342     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1343     struct gen6_interface_descriptor_data *desc;   
1344     int i;
1345     dri_bo *bo;
1346
1347     bo = mfc_context->gpe_context.idrt.bo;
1348     dri_bo_map(bo, 1);
1349     assert(bo->virtual);
1350     desc = bo->virtual;
1351
1352     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1353         struct i965_kernel *kernel;
1354
1355         kernel = &mfc_context->gpe_context.kernels[i];
1356         assert(sizeof(*desc) == 32);
1357
1358         /*Setup the descritor table*/
1359         memset(desc, 0, sizeof(*desc));
1360         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1361         desc->desc2.sampler_count = 0;
1362         desc->desc2.sampler_state_pointer = 0;
1363         desc->desc3.binding_table_entry_count = 2;
1364         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1365         desc->desc4.constant_urb_entry_read_offset = 0;
1366         desc->desc4.constant_urb_entry_read_length = 4;
1367                 
1368         /*kernel start*/
1369         dri_bo_emit_reloc(bo,   
1370                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1371                           0,
1372                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1373                           kernel->bo);
1374         desc++;
1375     }
1376
1377     dri_bo_unmap(bo);
1378 }
1379
1380 static void
1381 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1382                                      struct encode_state *encode_state,
1383                                      struct intel_encoder_context *encoder_context)
1384 {
1385     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1386     
1387     (void)mfc_context;
1388 }
1389
1390 static void
1391 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1392                                           int index,
1393                                           int head_offset,
1394                                           int batchbuffer_offset,
1395                                           int head_size,
1396                                           int tail_size,
1397                                           int number_mb_cmds,
1398                                           int first_object,
1399                                           int last_object,
1400                                           int last_slice,
1401                                           int mb_x,
1402                                           int mb_y,
1403                                           int width_in_mbs,
1404                                           int qp)
1405 {
1406     BEGIN_BATCH(batch, 12);
1407     
1408     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1409     OUT_BATCH(batch, index);
1410     OUT_BATCH(batch, 0);
1411     OUT_BATCH(batch, 0);
1412     OUT_BATCH(batch, 0);
1413     OUT_BATCH(batch, 0);
1414    
1415     /*inline data */
1416     OUT_BATCH(batch, head_offset);
1417     OUT_BATCH(batch, batchbuffer_offset);
1418     OUT_BATCH(batch, 
1419               head_size << 16 |
1420               tail_size);
1421     OUT_BATCH(batch,
1422               number_mb_cmds << 16 |
1423               first_object << 2 |
1424               last_object << 1 |
1425               last_slice);
1426     OUT_BATCH(batch,
1427               mb_y << 8 |
1428               mb_x);
1429     OUT_BATCH(batch,
1430               qp << 16 |
1431               width_in_mbs);
1432
1433     ADVANCE_BATCH(batch);
1434 }
1435
1436 static void
1437 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1438                                         struct intel_encoder_context *encoder_context,
1439                                         VAEncSliceParameterBufferH264 *slice_param,
1440                                         int head_offset,
1441                                         unsigned short head_size,
1442                                         unsigned short tail_size,
1443                                         int batchbuffer_offset,
1444                                         int qp,
1445                                         int last_slice)
1446 {
1447     struct intel_batchbuffer *batch = encoder_context->base.batch;
1448     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1449     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1450     int total_mbs = slice_param->num_macroblocks;
1451     int number_mb_cmds = 128;
1452     int starting_mb = 0;
1453     int last_object = 0;
1454     int first_object = 1;
1455     int i;
1456     int mb_x, mb_y;
1457     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1458
1459     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1460         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1461         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1462         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1463         assert(mb_x <= 255 && mb_y <= 255);
1464
1465         starting_mb += number_mb_cmds;
1466
1467         gen75_mfc_batchbuffer_emit_object_command(batch,
1468                                                   index,
1469                                                   head_offset,
1470                                                   batchbuffer_offset,
1471                                                   head_size,
1472                                                   tail_size,
1473                                                   number_mb_cmds,
1474                                                   first_object,
1475                                                   last_object,
1476                                                   last_slice,
1477                                                   mb_x,
1478                                                   mb_y,
1479                                                   width_in_mbs,
1480                                                   qp);
1481
1482         if (first_object) {
1483             head_offset += head_size;
1484             batchbuffer_offset += head_size;
1485         }
1486
1487         if (last_object) {
1488             head_offset += tail_size;
1489             batchbuffer_offset += tail_size;
1490         }
1491
1492         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1493
1494         first_object = 0;
1495     }
1496
1497     if (!last_object) {
1498         last_object = 1;
1499         number_mb_cmds = total_mbs % number_mb_cmds;
1500         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1501         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1502         assert(mb_x <= 255 && mb_y <= 255);
1503         starting_mb += number_mb_cmds;
1504
1505         gen75_mfc_batchbuffer_emit_object_command(batch,
1506                                                   index,
1507                                                   head_offset,
1508                                                   batchbuffer_offset,
1509                                                   head_size,
1510                                                   tail_size,
1511                                                   number_mb_cmds,
1512                                                   first_object,
1513                                                   last_object,
1514                                                   last_slice,
1515                                                   mb_x,
1516                                                   mb_y,
1517                                                   width_in_mbs,
1518                                                   qp);
1519     }
1520 }
1521                           
1522 /*
1523  * return size in Owords (16bytes)
1524  */         
1525 static int
1526 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1527                                 struct encode_state *encode_state,
1528                                 struct intel_encoder_context *encoder_context,
1529                                 int slice_index,
1530                                 int batchbuffer_offset)
1531 {
1532     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1533     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1534     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1535     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1536     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1537     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1538     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1539     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1540     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1541     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1542     unsigned char *slice_header = NULL;
1543     int slice_header_length_in_bits = 0;
1544     unsigned int tail_data[] = { 0x0, 0x0 };
1545     long head_offset;
1546     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1547     unsigned short head_size, tail_size;
1548     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1549
1550     if (rate_control_mode == VA_RC_CBR) {
1551         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1552         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1553     }
1554
1555     /* only support for 8-bit pixel bit-depth */
1556     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1557     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1558     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1559     assert(qp >= 0 && qp < 52);
1560
1561     head_offset = old_used / 16;
1562     gen75_mfc_avc_slice_state(ctx,
1563                               pPicParameter,
1564                               pSliceParameter,
1565                               encode_state,
1566                               encoder_context,
1567                               (rate_control_mode == VA_RC_CBR),
1568                               qp,
1569                               slice_batch);
1570
1571     if (slice_index == 0)
1572         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1573
1574     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1575
1576     // slice hander
1577     mfc_context->insert_object(ctx,
1578                                encoder_context,
1579                                (unsigned int *)slice_header,
1580                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1581                                slice_header_length_in_bits & 0x1f,
1582                                5,  /* first 5 bytes are start code + nal unit type */
1583                                1,
1584                                0,
1585                                1,
1586                                slice_batch);
1587     free(slice_header);
1588
1589     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1590     used = intel_batchbuffer_used_size(slice_batch);
1591     head_size = (used - old_used) / 16;
1592     old_used = used;
1593
1594     /* tail */
1595     if (last_slice) {    
1596         mfc_context->insert_object(ctx,
1597                                    encoder_context,
1598                                    tail_data,
1599                                    2,
1600                                    8,
1601                                    2,
1602                                    1,
1603                                    1,
1604                                    0,
1605                                    slice_batch);
1606     } else {
1607         mfc_context->insert_object(ctx,
1608                                    encoder_context,
1609                                    tail_data,
1610                                    1,
1611                                    8,
1612                                    1,
1613                                    1,
1614                                    1,
1615                                    0,
1616                                    slice_batch);
1617     }
1618
1619     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1620     used = intel_batchbuffer_used_size(slice_batch);
1621     tail_size = (used - old_used) / 16;
1622
1623    
1624     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1625                                             encoder_context,
1626                                             pSliceParameter,
1627                                             head_offset,
1628                                             head_size,
1629                                             tail_size,
1630                                             batchbuffer_offset,
1631                                             qp,
1632                                             last_slice);
1633
1634     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1635 }
1636
1637 static void
1638 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1639                                    struct encode_state *encode_state,
1640                                    struct intel_encoder_context *encoder_context)
1641 {
1642     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1643     struct intel_batchbuffer *batch = encoder_context->base.batch;
1644     int i, size, offset = 0;
1645     intel_batchbuffer_start_atomic(batch, 0x4000); 
1646     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1647
1648     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1649         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1650         offset += size;
1651     }
1652
1653     intel_batchbuffer_end_atomic(batch);
1654     intel_batchbuffer_flush(batch);
1655 }
1656
1657 static void
1658 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1659                                 struct encode_state *encode_state,
1660                                 struct intel_encoder_context *encoder_context)
1661 {
1662     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1663     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1664     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1665     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1666 }
1667
1668 static dri_bo *
1669 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1670                                    struct encode_state *encode_state,
1671                                    struct intel_encoder_context *encoder_context)
1672 {
1673     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1674
1675     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1676     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1677
1678     return mfc_context->mfc_batchbuffer_surface.bo;
1679 }
1680
1681 #endif
1682
1683 static void
1684 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1685                                   struct encode_state *encode_state,
1686                                   struct intel_encoder_context *encoder_context)
1687 {
1688     struct intel_batchbuffer *batch = encoder_context->base.batch;
1689     dri_bo *slice_batch_bo;
1690
1691     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1692         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1693         assert(0);
1694         return; 
1695     }
1696
1697 #ifdef MFC_SOFTWARE_HASWELL
1698     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1699 #else
1700     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1701 #endif
1702
1703     // begin programing
1704     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1705     intel_batchbuffer_emit_mi_flush(batch);
1706     
1707     // picture level programing
1708     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1709
1710     BEGIN_BCS_BATCH(batch, 2);
1711     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1712     OUT_BCS_RELOC(batch,
1713                   slice_batch_bo,
1714                   I915_GEM_DOMAIN_COMMAND, 0, 
1715                   0);
1716     ADVANCE_BCS_BATCH(batch);
1717
1718     // end programing
1719     intel_batchbuffer_end_atomic(batch);
1720
1721     dri_bo_unreference(slice_batch_bo);
1722 }
1723
1724
1725 static VAStatus
1726 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1727                              struct encode_state *encode_state,
1728                              struct intel_encoder_context *encoder_context)
1729 {
1730     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1731     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1732     int current_frame_bits_size;
1733     int sts;
1734  
1735     for (;;) {
1736         gen75_mfc_init(ctx, encode_state, encoder_context);
1737         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1738         /*Programing bcs pipeline*/
1739         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1740         gen75_mfc_run(ctx, encode_state, encoder_context);
1741         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1742             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1743             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1744             if (sts == BRC_NO_HRD_VIOLATION) {
1745                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1746                 break;
1747             }
1748             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1749                 if (!mfc_context->hrd.violation_noted) {
1750                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1751                     mfc_context->hrd.violation_noted = 1;
1752                 }
1753                 return VA_STATUS_SUCCESS;
1754             }
1755         } else {
1756             break;
1757         }
1758     }
1759
1760     return VA_STATUS_SUCCESS;
1761 }
1762
1763 /*
1764  * MPEG-2
1765  */
1766
1767 static const int
1768 va_to_gen75_mpeg2_picture_type[3] = {
1769     1,  /* I */
1770     2,  /* P */
1771     3   /* B */
1772 };
1773
1774 static void
1775 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1776                           struct intel_encoder_context *encoder_context,
1777                           struct encode_state *encode_state)
1778 {
1779     struct intel_batchbuffer *batch = encoder_context->base.batch;
1780     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1781     VAEncPictureParameterBufferMPEG2 *pic_param;
1782     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1783     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1784     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1785
1786     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1787     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1788     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1789
1790     BEGIN_BCS_BATCH(batch, 13);
1791     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1792     OUT_BCS_BATCH(batch,
1793                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1794                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1795                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1796                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1797                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1798                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1799                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1800                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1801                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1802                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1803                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1804                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1805     OUT_BCS_BATCH(batch,
1806                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1807                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1808                   0);
1809     OUT_BCS_BATCH(batch,
1810                   1 << 31 |     /* slice concealment */
1811                   (height_in_mbs - 1) << 16 |
1812                   (width_in_mbs - 1));
1813     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1814         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1815     else
1816         OUT_BCS_BATCH(batch, 0);
1817
1818     OUT_BCS_BATCH(batch, 0);
1819     OUT_BCS_BATCH(batch,
1820                   0xFFF << 16 | /* InterMBMaxSize */
1821                   0xFFF << 0 |  /* IntraMBMaxSize */
1822                   0);
1823     OUT_BCS_BATCH(batch, 0);
1824     OUT_BCS_BATCH(batch, 0);
1825     OUT_BCS_BATCH(batch, 0);
1826     OUT_BCS_BATCH(batch, 0);
1827     OUT_BCS_BATCH(batch, 0);
1828     OUT_BCS_BATCH(batch, 0);
1829     ADVANCE_BCS_BATCH(batch);
1830 }
1831
1832 static void
1833 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1834 {
1835     unsigned char intra_qm[64] = {
1836         8, 16, 19, 22, 26, 27, 29, 34,
1837         16, 16, 22, 24, 27, 29, 34, 37,
1838         19, 22, 26, 27, 29, 34, 34, 38,
1839         22, 22, 26, 27, 29, 34, 37, 40,
1840         22, 26, 27, 29, 32, 35, 40, 48,
1841         26, 27, 29, 32, 35, 40, 48, 58,
1842         26, 27, 29, 34, 38, 46, 56, 69,
1843         27, 29, 35, 38, 46, 56, 69, 83
1844     };
1845
1846     unsigned char non_intra_qm[64] = {
1847         16, 16, 16, 16, 16, 16, 16, 16,
1848         16, 16, 16, 16, 16, 16, 16, 16,
1849         16, 16, 16, 16, 16, 16, 16, 16,
1850         16, 16, 16, 16, 16, 16, 16, 16,
1851         16, 16, 16, 16, 16, 16, 16, 16,
1852         16, 16, 16, 16, 16, 16, 16, 16,
1853         16, 16, 16, 16, 16, 16, 16, 16,
1854         16, 16, 16, 16, 16, 16, 16, 16
1855     };
1856
1857     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1858     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1859 }
1860
1861 static void
1862 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1863 {
1864     unsigned short intra_fqm[64] = {
1865         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1866         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1867         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1868         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1869         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1870         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1871         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1872         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1873     };
1874
1875     unsigned short non_intra_fqm[64] = {
1876         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1878         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1879         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1880         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1881         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1882         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1884     };
1885
1886     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1887     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1888 }
1889
1890 static void
1891 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1892                                  struct intel_encoder_context *encoder_context,
1893                                  int x, int y,
1894                                  int next_x, int next_y,
1895                                  int is_fisrt_slice_group,
1896                                  int is_last_slice_group,
1897                                  int intra_slice,
1898                                  int qp,
1899                                  struct intel_batchbuffer *batch)
1900 {
1901     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1902
1903     if (batch == NULL)
1904         batch = encoder_context->base.batch;
1905
1906     BEGIN_BCS_BATCH(batch, 8);
1907
1908     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1909     OUT_BCS_BATCH(batch,
1910                   0 << 31 |                             /* MbRateCtrlFlag */
1911                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1912                   1 << 17 |                             /* Insert Header before the first slice group data */
1913                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1914                   1 << 15 |                             /* TailPresentFlag: always 1 */
1915                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1916                   !!intra_slice << 13 |                 /* IntraSlice */
1917                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1918                   0);
1919     OUT_BCS_BATCH(batch,
1920                   next_y << 24 |
1921                   next_x << 16 |
1922                   y << 8 |
1923                   x << 0 |
1924                   0);
1925     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1926     /* bitstream pointer is only loaded once for the first slice of a frame when 
1927      * LoadSlicePointerFlag is 0
1928      */
1929     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1930     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1931     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1932     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1933
1934     ADVANCE_BCS_BATCH(batch);
1935 }
1936
1937 static int
1938 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1939                                  struct intel_encoder_context *encoder_context,
1940                                  int x, int y,
1941                                  int first_mb_in_slice,
1942                                  int last_mb_in_slice,
1943                                  int first_mb_in_slice_group,
1944                                  int last_mb_in_slice_group,
1945                                  int mb_type,
1946                                  int qp_scale_code,
1947                                  int coded_block_pattern,
1948                                  unsigned char target_size_in_word,
1949                                  unsigned char max_size_in_word,
1950                                  struct intel_batchbuffer *batch)
1951 {
1952     int len_in_dwords = 9;
1953
1954     if (batch == NULL)
1955         batch = encoder_context->base.batch;
1956
1957     BEGIN_BCS_BATCH(batch, len_in_dwords);
1958
1959     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1960     OUT_BCS_BATCH(batch,
1961                   0 << 24 |     /* PackedMvNum */
1962                   0 << 20 |     /* MvFormat */
1963                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1964                   0 << 15 |     /* TransformFlag: frame DCT */
1965                   0 << 14 |     /* FieldMbFlag */
1966                   1 << 13 |     /* IntraMbFlag */
1967                   mb_type << 8 |   /* MbType: Intra */
1968                   0 << 2 |      /* SkipMbFlag */
1969                   0 << 0 |      /* InterMbMode */
1970                   0);
1971     OUT_BCS_BATCH(batch, y << 16 | x);
1972     OUT_BCS_BATCH(batch,
1973                   max_size_in_word << 24 |
1974                   target_size_in_word << 16 |
1975                   coded_block_pattern << 6 |      /* CBP */
1976                   0);
1977     OUT_BCS_BATCH(batch,
1978                   last_mb_in_slice << 31 |
1979                   first_mb_in_slice << 30 |
1980                   0 << 27 |     /* EnableCoeffClamp */
1981                   last_mb_in_slice_group << 26 |
1982                   0 << 25 |     /* MbSkipConvDisable */
1983                   first_mb_in_slice_group << 24 |
1984                   0 << 16 |     /* MvFieldSelect */
1985                   qp_scale_code << 0 |
1986                   0);
1987     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1988     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1989     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1990     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1991
1992     ADVANCE_BCS_BATCH(batch);
1993
1994     return len_in_dwords;
1995 }
1996
1997 #define MPEG2_INTER_MV_OFFSET   12 
1998
1999 static struct _mv_ranges
2000 {
2001     int low;    /* in the unit of 1/2 pixel */
2002     int high;   /* in the unit of 1/2 pixel */
2003 } mv_ranges[] = {
2004     {0, 0},
2005     {-16, 15},
2006     {-32, 31},
2007     {-64, 63},
2008     {-128, 127},
2009     {-256, 255},
2010     {-512, 511},
2011     {-1024, 1023},
2012     {-2048, 2047},
2013     {-4096, 4095}
2014 };
2015
2016 static int
2017 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2018 {
2019     if (mv + pos * 16 * 2 < 0 ||
2020         mv + (pos + 1) * 16 * 2 > display_max * 2)
2021         mv = 0;
2022
2023     if (f_code > 0 && f_code < 10) {
2024         if (mv < mv_ranges[f_code].low)
2025             mv = mv_ranges[f_code].low;
2026
2027         if (mv > mv_ranges[f_code].high)
2028             mv = mv_ranges[f_code].high;
2029     }
2030
2031     return mv;
2032 }
2033
2034 static int
2035 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2036                                  struct encode_state *encode_state,
2037                                  struct intel_encoder_context *encoder_context,
2038                                  unsigned int *msg,
2039                                  int width_in_mbs, int height_in_mbs,
2040                                  int x, int y,
2041                                  int first_mb_in_slice,
2042                                  int last_mb_in_slice,
2043                                  int first_mb_in_slice_group,
2044                                  int last_mb_in_slice_group,
2045                                  int qp_scale_code,
2046                                  unsigned char target_size_in_word,
2047                                  unsigned char max_size_in_word,
2048                                  struct intel_batchbuffer *batch)
2049 {
2050     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2051     int len_in_dwords = 9;
2052     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2053     
2054     if (batch == NULL)
2055         batch = encoder_context->base.batch;
2056
2057     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2058     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2059     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2060     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2061     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2062
2063     BEGIN_BCS_BATCH(batch, len_in_dwords);
2064
2065     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2066     OUT_BCS_BATCH(batch,
2067                   2 << 24 |     /* PackedMvNum */
2068                   7 << 20 |     /* MvFormat */
2069                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2070                   0 << 15 |     /* TransformFlag: frame DCT */
2071                   0 << 14 |     /* FieldMbFlag */
2072                   0 << 13 |     /* IntraMbFlag */
2073                   1 << 8 |      /* MbType: Frame-based */
2074                   0 << 2 |      /* SkipMbFlag */
2075                   0 << 0 |      /* InterMbMode */
2076                   0);
2077     OUT_BCS_BATCH(batch, y << 16 | x);
2078     OUT_BCS_BATCH(batch,
2079                   max_size_in_word << 24 |
2080                   target_size_in_word << 16 |
2081                   0x3f << 6 |   /* CBP */
2082                   0);
2083     OUT_BCS_BATCH(batch,
2084                   last_mb_in_slice << 31 |
2085                   first_mb_in_slice << 30 |
2086                   0 << 27 |     /* EnableCoeffClamp */
2087                   last_mb_in_slice_group << 26 |
2088                   0 << 25 |     /* MbSkipConvDisable */
2089                   first_mb_in_slice_group << 24 |
2090                   0 << 16 |     /* MvFieldSelect */
2091                   qp_scale_code << 0 |
2092                   0);
2093
2094     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2095     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2096     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2097     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2098
2099     ADVANCE_BCS_BATCH(batch);
2100
2101     return len_in_dwords;
2102 }
2103
2104 static void
2105 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2106                                            struct encode_state *encode_state,
2107                                            struct intel_encoder_context *encoder_context,
2108                                            struct intel_batchbuffer *slice_batch)
2109 {
2110     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2111     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2112
2113     if (encode_state->packed_header_data[idx]) {
2114         VAEncPackedHeaderParameterBuffer *param = NULL;
2115         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2116         unsigned int length_in_bits;
2117
2118         assert(encode_state->packed_header_param[idx]);
2119         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2120         length_in_bits = param->bit_length;
2121
2122         mfc_context->insert_object(ctx,
2123                                    encoder_context,
2124                                    header_data,
2125                                    ALIGN(length_in_bits, 32) >> 5,
2126                                    length_in_bits & 0x1f,
2127                                    5,   /* FIXME: check it */
2128                                    0,
2129                                    0,
2130                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2131                                    slice_batch);
2132     }
2133
2134     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2135
2136     if (encode_state->packed_header_data[idx]) {
2137         VAEncPackedHeaderParameterBuffer *param = NULL;
2138         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2139         unsigned int length_in_bits;
2140
2141         assert(encode_state->packed_header_param[idx]);
2142         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2143         length_in_bits = param->bit_length;
2144
2145         mfc_context->insert_object(ctx,
2146                                    encoder_context,
2147                                    header_data,
2148                                    ALIGN(length_in_bits, 32) >> 5,
2149                                    length_in_bits & 0x1f,
2150                                    5,   /* FIXME: check it */
2151                                    0,
2152                                    0,
2153                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2154                                    slice_batch);
2155     }
2156 }
2157
2158 static void 
2159 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2160                                      struct encode_state *encode_state,
2161                                      struct intel_encoder_context *encoder_context,
2162                                      int slice_index,
2163                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2164                                      struct intel_batchbuffer *slice_batch)
2165 {
2166     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2167     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2168     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2169     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2170     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2171     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2172     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2173     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2174     int i, j;
2175     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2176     unsigned int *msg = NULL;
2177     unsigned char *msg_ptr = NULL;
2178
2179     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2180     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2181     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2182     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2183
2184     dri_bo_map(vme_context->vme_output.bo , 0);
2185     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2186
2187     if (next_slice_group_param) {
2188         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2189         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2190     } else {
2191         h_next_start_pos = 0;
2192         v_next_start_pos = height_in_mbs;
2193     }
2194
2195     gen75_mfc_mpeg2_slicegroup_state(ctx,
2196                                      encoder_context,
2197                                      h_start_pos,
2198                                      v_start_pos,
2199                                      h_next_start_pos,
2200                                      v_next_start_pos,
2201                                      slice_index == 0,
2202                                      next_slice_group_param == NULL,
2203                                      slice_param->is_intra_slice,
2204                                      slice_param->quantiser_scale_code,
2205                                      slice_batch);
2206
2207     if (slice_index == 0) 
2208         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2209
2210     /* Insert '00' to make sure the header is valid */
2211     mfc_context->insert_object(ctx,
2212                                encoder_context,
2213                                (unsigned int*)section_delimiter,
2214                                1,
2215                                8,   /* 8bits in the last DWORD */
2216                                1,   /* 1 byte */
2217                                1,
2218                                0,
2219                                0,
2220                                slice_batch);
2221
2222     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2223         /* PAK for each macroblocks */
2224         for (j = 0; j < slice_param->num_macroblocks; j++) {
2225             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2226             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2227             int first_mb_in_slice = (j == 0);
2228             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2229             int first_mb_in_slice_group = (i == 0 && j == 0);
2230             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2231                                           j == slice_param->num_macroblocks - 1);
2232
2233             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2234
2235             if (slice_param->is_intra_slice) {
2236                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2237                                                  encoder_context,
2238                                                  h_pos, v_pos,
2239                                                  first_mb_in_slice,
2240                                                  last_mb_in_slice,
2241                                                  first_mb_in_slice_group,
2242                                                  last_mb_in_slice_group,
2243                                                  0x1a,
2244                                                  slice_param->quantiser_scale_code,
2245                                                  0x3f,
2246                                                  0,
2247                                                  0xff,
2248                                                  slice_batch);
2249             } else {
2250                 int inter_rdo, intra_rdo;
2251                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2252                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2253
2254                 if (intra_rdo < inter_rdo) 
2255                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2256                                                      encoder_context,
2257                                                      h_pos, v_pos,
2258                                                      first_mb_in_slice,
2259                                                      last_mb_in_slice,
2260                                                      first_mb_in_slice_group,
2261                                                      last_mb_in_slice_group,
2262                                                      0x1a,
2263                                                      slice_param->quantiser_scale_code,
2264                                                      0x3f,
2265                                                      0,
2266                                                      0xff,
2267                                                      slice_batch);
2268                 else
2269                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2270                                                      encode_state,
2271                                                      encoder_context,
2272                                                      msg,
2273                                                      width_in_mbs, height_in_mbs,
2274                                                      h_pos, v_pos,
2275                                                      first_mb_in_slice,
2276                                                      last_mb_in_slice,
2277                                                      first_mb_in_slice_group,
2278                                                      last_mb_in_slice_group,
2279                                                      slice_param->quantiser_scale_code,
2280                                                      0,
2281                                                      0xff,
2282                                                      slice_batch);
2283             }
2284         }
2285
2286         slice_param++;
2287     }
2288
2289     dri_bo_unmap(vme_context->vme_output.bo);
2290
2291     /* tail data */
2292     if (next_slice_group_param == NULL) { /* end of a picture */
2293         mfc_context->insert_object(ctx,
2294                                    encoder_context,
2295                                    (unsigned int *)tail_delimiter,
2296                                    2,
2297                                    8,   /* 8bits in the last DWORD */
2298                                    5,   /* 5 bytes */
2299                                    1,
2300                                    1,
2301                                    0,
2302                                    slice_batch);
2303     } else {        /* end of a lsice group */
2304         mfc_context->insert_object(ctx,
2305                                    encoder_context,
2306                                    (unsigned int *)section_delimiter,
2307                                    1,
2308                                    8,   /* 8bits in the last DWORD */
2309                                    1,   /* 1 byte */
2310                                    1,
2311                                    1,
2312                                    0,
2313                                    slice_batch);
2314     }
2315 }
2316
2317 /* 
2318  * A batch buffer for all slices, including slice state, 
2319  * slice insert object and slice pak object commands
2320  *
2321  */
2322 static dri_bo *
2323 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2324                                            struct encode_state *encode_state,
2325                                            struct intel_encoder_context *encoder_context)
2326 {
2327     struct i965_driver_data *i965 = i965_driver_data(ctx);
2328     struct intel_batchbuffer *batch;
2329     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2330     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2331     dri_bo *batch_bo;
2332     int i;
2333     int buffer_size;
2334     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2335     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2336
2337     buffer_size = width_in_mbs * height_in_mbs * 64;
2338     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2339     batch_bo = batch->buffer;
2340
2341     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2342         if (i == encode_state->num_slice_params_ext - 1)
2343             next_slice_group_param = NULL;
2344         else
2345             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2346
2347         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2348     }
2349
2350     intel_batchbuffer_align(batch, 8);
2351     
2352     BEGIN_BCS_BATCH(batch, 2);
2353     OUT_BCS_BATCH(batch, 0);
2354     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2355     ADVANCE_BCS_BATCH(batch);
2356
2357     dri_bo_reference(batch_bo);
2358     intel_batchbuffer_free(batch);
2359
2360     return batch_bo;
2361 }
2362
2363 static void
2364 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2365                                             struct encode_state *encode_state,
2366                                             struct intel_encoder_context *encoder_context)
2367 {
2368     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2369
2370     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2371     mfc_context->set_surface_state(ctx, encoder_context);
2372     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2373     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2374     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2375     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2376     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2377     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2378 }
2379
2380 static void
2381 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2382                                     struct encode_state *encode_state,
2383                                     struct intel_encoder_context *encoder_context)
2384 {
2385     struct intel_batchbuffer *batch = encoder_context->base.batch;
2386     dri_bo *slice_batch_bo;
2387
2388     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2389
2390     // begin programing
2391     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2392     intel_batchbuffer_emit_mi_flush(batch);
2393     
2394     // picture level programing
2395     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2396
2397     BEGIN_BCS_BATCH(batch, 2);
2398     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2399     OUT_BCS_RELOC(batch,
2400                   slice_batch_bo,
2401                   I915_GEM_DOMAIN_COMMAND, 0, 
2402                   0);
2403     ADVANCE_BCS_BATCH(batch);
2404
2405     // end programing
2406     intel_batchbuffer_end_atomic(batch);
2407
2408     dri_bo_unreference(slice_batch_bo);
2409 }
2410
2411 static VAStatus
2412 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2413                         struct encode_state *encode_state,
2414                         struct intel_encoder_context *encoder_context)
2415 {
2416     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2417     struct object_surface *obj_surface; 
2418     struct object_buffer *obj_buffer;
2419     struct i965_coded_buffer_segment *coded_buffer_segment;
2420     VAStatus vaStatus = VA_STATUS_SUCCESS;
2421     dri_bo *bo;
2422     int i;
2423
2424     /* reconstructed surface */
2425     obj_surface = encode_state->reconstructed_object;
2426     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2427     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2428     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2429     mfc_context->surface_state.width = obj_surface->orig_width;
2430     mfc_context->surface_state.height = obj_surface->orig_height;
2431     mfc_context->surface_state.w_pitch = obj_surface->width;
2432     mfc_context->surface_state.h_pitch = obj_surface->height;
2433
2434     /* forward reference */
2435     obj_surface = encode_state->reference_objects[0];
2436
2437     if (obj_surface && obj_surface->bo) {
2438         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2439         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2440     } else
2441         mfc_context->reference_surfaces[0].bo = NULL;
2442
2443     /* backward reference */
2444     obj_surface = encode_state->reference_objects[1];
2445
2446     if (obj_surface && obj_surface->bo) {
2447         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2448         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2449     } else {
2450         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2451
2452         if (mfc_context->reference_surfaces[1].bo)
2453             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2454     }
2455
2456     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2457         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2458
2459         if (mfc_context->reference_surfaces[i].bo)
2460             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2461     }
2462     
2463     /* input YUV surface */
2464     obj_surface = encode_state->input_yuv_object;
2465     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2466     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2467
2468     /* coded buffer */
2469     obj_buffer = encode_state->coded_buf_object;
2470     bo = obj_buffer->buffer_store->bo;
2471     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2472     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2473     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2474     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2475
2476     /* set the internal flag to 0 to indicate the coded size is unknown */
2477     dri_bo_map(bo, 1);
2478     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2479     coded_buffer_segment->mapped = 0;
2480     coded_buffer_segment->codec = encoder_context->codec;
2481     dri_bo_unmap(bo);
2482
2483     return vaStatus;
2484 }
2485
2486 static VAStatus
2487 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2488                                struct encode_state *encode_state,
2489                                struct intel_encoder_context *encoder_context)
2490 {
2491     gen75_mfc_init(ctx, encode_state, encoder_context);
2492     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2493     /*Programing bcs pipeline*/
2494     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2495     gen75_mfc_run(ctx, encode_state, encoder_context);
2496
2497     return VA_STATUS_SUCCESS;
2498 }
2499
2500 static void
2501 gen75_mfc_context_destroy(void *context)
2502 {
2503     struct gen6_mfc_context *mfc_context = context;
2504     int i;
2505
2506     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2507     mfc_context->post_deblocking_output.bo = NULL;
2508
2509     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2510     mfc_context->pre_deblocking_output.bo = NULL;
2511
2512     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2513     mfc_context->uncompressed_picture_source.bo = NULL;
2514
2515     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2516     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2517
2518     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2519         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2520         mfc_context->direct_mv_buffers[i].bo = NULL;
2521     }
2522
2523     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2524     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2525
2526     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2527     mfc_context->macroblock_status_buffer.bo = NULL;
2528
2529     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2530     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2531
2532     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2533     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2534
2535     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2536         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2537         mfc_context->reference_surfaces[i].bo = NULL;  
2538     }
2539
2540     i965_gpe_context_destroy(&mfc_context->gpe_context);
2541
2542     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2543     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2544
2545     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2546     mfc_context->aux_batchbuffer_surface.bo = NULL;
2547
2548     if (mfc_context->aux_batchbuffer)
2549         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2550
2551     mfc_context->aux_batchbuffer = NULL;
2552
2553     free(mfc_context);
2554 }
2555
2556 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2557                                    VAProfile profile,
2558                                    struct encode_state *encode_state,
2559                                    struct intel_encoder_context *encoder_context)
2560 {
2561     VAStatus vaStatus;
2562
2563     switch (profile) {
2564     case VAProfileH264Baseline:
2565     case VAProfileH264Main:
2566     case VAProfileH264High:
2567         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2568         break;
2569
2570         /* FIXME: add for other profile */
2571     case VAProfileMPEG2Simple:
2572     case VAProfileMPEG2Main:
2573         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2574         break;
2575
2576     default:
2577         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2578         break;
2579     }
2580
2581     return vaStatus;
2582 }
2583
2584 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2585 {
2586     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2587
2588     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2589
2590     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2591     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2592
2593     mfc_context->gpe_context.curbe.length = 32 * 4;
2594
2595     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2596     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2597     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2598     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2599     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2600
2601     i965_gpe_load_kernels(ctx,
2602                           &mfc_context->gpe_context,
2603                           gen75_mfc_kernels,
2604                           NUM_MFC_KERNEL);
2605
2606     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2607     mfc_context->set_surface_state = gen75_mfc_surface_state;
2608     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2609     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2610     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2611     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2612     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2613     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2614
2615     encoder_context->mfc_context = mfc_context;
2616     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2617     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2618     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2619
2620     return True;
2621 }