Use the right wight/height to initialize the internal buffers for MPEG-2 encoding
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                 struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164         /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168         /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430
431     if (encoder_context->codec == CODEC_H264) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     /*Encode common setup for MFC*/
445     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
446     mfc_context->post_deblocking_output.bo = NULL;
447
448     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
449     mfc_context->pre_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
452     mfc_context->uncompressed_picture_source.bo = NULL;
453
454     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
455     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
456
457     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
458         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
459         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
460         mfc_context->direct_mv_buffers[i].bo = NULL;
461     }
462
463     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
464         if (mfc_context->reference_surfaces[i].bo != NULL)
465             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
466         mfc_context->reference_surfaces[i].bo = NULL;  
467     }
468
469     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       width_in_mbs * 64,
473                       64);
474     assert(bo);
475     mfc_context->intra_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
478     bo = dri_bo_alloc(i965->intel.bufmgr,
479                       "Buffer",
480                       width_in_mbs * height_in_mbs * 16,
481                       64);
482     assert(bo);
483     mfc_context->macroblock_status_buffer.bo = bo;
484
485     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
486     bo = dri_bo_alloc(i965->intel.bufmgr,
487                       "Buffer",
488                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
489                       64);
490     assert(bo);
491     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
492
493     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
494     bo = dri_bo_alloc(i965->intel.bufmgr,
495                       "Buffer",
496                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
497                       0x1000);
498     assert(bo);
499     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
500
501     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
502     mfc_context->mfc_batchbuffer_surface.bo = NULL;
503
504     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
505     mfc_context->aux_batchbuffer_surface.bo = NULL;
506
507     if (mfc_context->aux_batchbuffer)
508         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
509
510     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
511     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
512     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
513     mfc_context->aux_batchbuffer_surface.pitch = 16;
514     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
515     mfc_context->aux_batchbuffer_surface.size_block = 16;
516
517     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
518 }
519
520 static void
521 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
522                                 struct intel_encoder_context *encoder_context)
523 {
524     struct intel_batchbuffer *batch = encoder_context->base.batch;
525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526     int i;
527
528     BEGIN_BCS_BATCH(batch, 61);
529
530     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
531
532     /* the DW1-3 is for pre_deblocking */
533     if (mfc_context->pre_deblocking_output.bo)
534         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
535                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                       0);
537     else
538         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
539
540         OUT_BCS_BATCH(batch, 0);
541         OUT_BCS_BATCH(batch, 0);
542      /* the DW4-6 is for the post_deblocking */
543
544     if (mfc_context->post_deblocking_output.bo)
545         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
546                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                       0);                                                                                       /* post output addr  */ 
548     else
549         OUT_BCS_BATCH(batch, 0);
550         OUT_BCS_BATCH(batch, 0);
551         OUT_BCS_BATCH(batch, 0);
552
553      /* the DW7-9 is for the uncompressed_picture */
554     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* uncompressed data */
557
558         OUT_BCS_BATCH(batch, 0);
559         OUT_BCS_BATCH(batch, 0);
560
561      /* the DW10-12 is for the mb status */
562     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0); /* StreamOut data*/
565         OUT_BCS_BATCH(batch, 0);
566         OUT_BCS_BATCH(batch, 0);
567
568      /* the DW13-15 is for the intra_row_store_scratch */
569     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
570                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571                   0);   
572         OUT_BCS_BATCH(batch, 0);
573         OUT_BCS_BATCH(batch, 0);
574
575      /* the DW16-18 is for the deblocking filter */
576     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);
579         OUT_BCS_BATCH(batch, 0);
580         OUT_BCS_BATCH(batch, 0);
581
582     /* the DW 19-50 is for Reference pictures*/
583     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
584         if ( mfc_context->reference_surfaces[i].bo != NULL) {
585             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
586                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
587                           0);                   
588         } else {
589             OUT_BCS_BATCH(batch, 0);
590         }
591         OUT_BCS_BATCH(batch, 0);
592     }
593         OUT_BCS_BATCH(batch, 0);
594
595         /* The DW 52-54 is for the MB status buffer */
596     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
597                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598                   0);                                                                                   /* Macroblock status buffer*/
599         
600         OUT_BCS_BATCH(batch, 0);
601         OUT_BCS_BATCH(batch, 0);
602
603         /* the DW 55-57 is the ILDB buffer */
604         OUT_BCS_BATCH(batch, 0);
605         OUT_BCS_BATCH(batch, 0);
606         OUT_BCS_BATCH(batch, 0);
607
608         /* the DW 58-60 is the second ILDB buffer */
609         OUT_BCS_BATCH(batch, 0);
610         OUT_BCS_BATCH(batch, 0);
611         OUT_BCS_BATCH(batch, 0);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void
616 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
617 {
618     struct intel_batchbuffer *batch = encoder_context->base.batch;
619     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
620     struct i965_driver_data *i965 = i965_driver_data(ctx);
621     int i;
622
623     if (IS_STEPPING_BPLUS(i965)) {
624         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
625         return;
626     }
627
628     BEGIN_BCS_BATCH(batch, 25);
629
630     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
631
632     if (mfc_context->pre_deblocking_output.bo)
633         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
634                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                       0);
636     else
637         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
638
639     if (mfc_context->post_deblocking_output.bo)
640         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
641                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                       0);                                                                                       /* post output addr  */ 
643     else
644         OUT_BCS_BATCH(batch, 0);
645
646     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
647                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                   0);                                                                                   /* uncompressed data */
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* StreamOut data*/
652     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);   
655     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);
658     /* 7..22 Reference pictures*/
659     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
660         if ( mfc_context->reference_surfaces[i].bo != NULL) {
661             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
662                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                           0);                   
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666         }
667     }
668     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
669                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670                   0);                                                                                   /* Macroblock status buffer*/
671
672         OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
679                                 struct intel_encoder_context *encoder_context)
680 {
681     struct intel_batchbuffer *batch = encoder_context->base.batch;
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     int i;
685
686     BEGIN_BCS_BATCH(batch, 71);
687
688     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
689
690     /* Reference frames and Current frames */
691     /* the DW1-32 is for the direct MV for reference */
692     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
693         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
694             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697             OUT_BCS_BATCH(batch, 0);
698         } else {
699             OUT_BCS_BATCH(batch, 0);
700             OUT_BCS_BATCH(batch, 0);
701         }
702     }
703         OUT_BCS_BATCH(batch, 0);
704
705         /* the DW34-36 is the MV for the current reference */
706         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
707                           I915_GEM_DOMAIN_INSTRUCTION, 0,
708                           0);
709
710         OUT_BCS_BATCH(batch, 0);
711         OUT_BCS_BATCH(batch, 0);
712
713     /* POL list */
714     for(i = 0; i < 32; i++) {
715         OUT_BCS_BATCH(batch, i/2);
716     }
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719
720     ADVANCE_BCS_BATCH(batch);
721 }
722
723 static void
724 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
725 {
726     struct intel_batchbuffer *batch = encoder_context->base.batch;
727     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     int i;
730
731     if (IS_STEPPING_BPLUS(i965)) {
732         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
733         return;
734     }
735
736     BEGIN_BCS_BATCH(batch, 69);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
739
740     /* Reference frames and Current frames */
741     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
742         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
743             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
744                           I915_GEM_DOMAIN_INSTRUCTION, 0,
745                           0);
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     /* POL list */
752     for(i = 0; i < 32; i++) {
753         OUT_BCS_BATCH(batch, i/2);
754     }
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757
758     ADVANCE_BCS_BATCH(batch);
759 }
760
761
762 static void
763 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
764                                 struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
768
769     BEGIN_BCS_BATCH(batch, 10);
770
771     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
772     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
773                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
774                   0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777         
778         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782
783         /* the DW7-9 is for Bitplane Read Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static void
792 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
793 {
794     struct intel_batchbuffer *batch = encoder_context->base.batch;
795     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797
798     if (IS_STEPPING_BPLUS(i965)) {
799         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
800         return;
801     }
802
803     BEGIN_BCS_BATCH(batch, 4);
804
805     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
806     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808                   0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815
816 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
817                                       struct encode_state *encode_state,
818                                       struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
821
822     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
823     mfc_context->set_surface_state(ctx, encoder_context);
824     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
825     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
826     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
827     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
828     mfc_context->avc_qm_state(ctx, encoder_context);
829     mfc_context->avc_fqm_state(ctx, encoder_context);
830     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
831     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
832 }
833
834
835 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
836                              struct encode_state *encode_state,
837                              struct intel_encoder_context *encoder_context)
838 {
839     struct intel_batchbuffer *batch = encoder_context->base.batch;
840
841     intel_batchbuffer_flush(batch);             //run the pipeline
842
843     return VA_STATUS_SUCCESS;
844 }
845
846
847 static VAStatus
848 gen75_mfc_stop(VADriverContextP ctx, 
849               struct encode_state *encode_state,
850               struct intel_encoder_context *encoder_context,
851               int *encoded_bits_size)
852 {
853     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
854     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855     VACodedBufferSegment *coded_buffer_segment;
856     
857     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
858     assert(vaStatus == VA_STATUS_SUCCESS);
859     *encoded_bits_size = coded_buffer_segment->size * 8;
860     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
861
862     return VA_STATUS_SUCCESS;
863 }
864
865
866 static void
867 gen75_mfc_avc_slice_state(VADriverContextP ctx,
868                          VAEncPictureParameterBufferH264 *pic_param,
869                          VAEncSliceParameterBufferH264 *slice_param,
870                          struct encode_state *encode_state,
871                          struct intel_encoder_context *encoder_context,
872                          int rate_control_enable,
873                          int qp,
874                          struct intel_batchbuffer *batch)
875 {
876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
877     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
878     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
879     int beginmb = slice_param->macroblock_address;
880     int endmb = beginmb + slice_param->num_macroblocks;
881     int beginx = beginmb % width_in_mbs;
882     int beginy = beginmb / width_in_mbs;
883     int nextx =  endmb % width_in_mbs;
884     int nexty = endmb / width_in_mbs;
885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
886     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
887     int maxQpN, maxQpP;
888     unsigned char correct[6], grow, shrink;
889     int i;
890     int bslice = 0;
891     int weighted_pred_idc = 0;
892     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
893     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
894
895     if (batch == NULL)
896         batch = encoder_context->base.batch;
897
898     if (slice_type == SLICE_TYPE_P) {
899         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
900     } else if (slice_type == SLICE_TYPE_B) {
901         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
902         bslice = 1;
903
904         if (weighted_pred_idc == 2) {
905             /* 8.4.3 - Derivation process for prediction weights (8-279) */
906             luma_log2_weight_denom = 5;
907             chroma_log2_weight_denom = 5;
908         }
909     }
910
911     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
912     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
913
914     for (i = 0; i < 6; i++)
915         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
916
917     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
918         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
919     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
920         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
921
922     BEGIN_BCS_BATCH(batch, 11);;
923
924     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
925     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
926
927     if (slice_type == SLICE_TYPE_I) {
928         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
929     } else {
930         OUT_BCS_BATCH(batch,
931                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
932                       (chroma_log2_weight_denom << 8) |
933                       (luma_log2_weight_denom << 0));
934     }
935
936     OUT_BCS_BATCH(batch, 
937                   (weighted_pred_idc << 30) |
938                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
939                   (slice_param->disable_deblocking_filter_idc << 27) |
940                   (slice_param->cabac_init_idc << 24) |
941                   (qp<<16) |                    /*Slice Quantization Parameter*/
942                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
943                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
944     OUT_BCS_BATCH(batch,
945                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
946                   (beginx << 16) |
947                   slice_param->macroblock_address );
948     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
949     OUT_BCS_BATCH(batch, 
950                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
951                   (1 << 30) |           /*ResetRateControlCounter*/
952                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
953                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
954                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
955                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
956                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
957                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
958                   (last_slice << 19) |     /*IsLastSlice*/
959                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
960                   (1 << 17) |       /*HeaderPresentFlag*/       
961                   (1 << 16) |       /*SliceData PresentFlag*/
962                   (1 << 15) |       /*TailPresentFlag*/
963                   (1 << 13) |       /*RBSP NAL TYPE*/   
964                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
965     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
966     OUT_BCS_BATCH(batch,
967                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
968                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
969                   (shrink << 8)  |
970                   (grow << 0));   
971     OUT_BCS_BATCH(batch,
972                   (correct[5] << 20) |
973                   (correct[4] << 16) |
974                   (correct[3] << 12) |
975                   (correct[2] << 8) |
976                   (correct[1] << 4) |
977                   (correct[0] << 0));
978     OUT_BCS_BATCH(batch, 0);
979
980     ADVANCE_BCS_BATCH(batch);
981 }
982
983
984 #ifdef MFC_SOFTWARE_HASWELL
985
986 static int
987 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
988                                 int qp,unsigned int *msg,
989                               struct intel_encoder_context *encoder_context,
990                               unsigned char target_mb_size, unsigned char max_mb_size,
991                               struct intel_batchbuffer *batch)
992 {
993     int len_in_dwords = 12;
994     unsigned int intra_msg;
995 #define         INTRA_MSG_FLAG          (1 << 13)
996 #define         INTRA_MBTYPE_MASK       (0x1F0000)
997     if (batch == NULL)
998         batch = encoder_context->base.batch;
999
1000     BEGIN_BCS_BATCH(batch, len_in_dwords);
1001
1002     intra_msg = msg[0] & 0xC0FF;
1003     intra_msg |= INTRA_MSG_FLAG;
1004     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1005     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1006     OUT_BCS_BATCH(batch, 0);
1007     OUT_BCS_BATCH(batch, 0);
1008     OUT_BCS_BATCH(batch, 
1009                   (0 << 24) |           /* PackedMvNum, Debug*/
1010                   (0 << 20) |           /* No motion vector */
1011                   (1 << 19) |           /* CbpDcY */
1012                   (1 << 18) |           /* CbpDcU */
1013                   (1 << 17) |           /* CbpDcV */
1014                   intra_msg);
1015
1016     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1017     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1018     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1019
1020     /*Stuff for Intra MB*/
1021     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1022     OUT_BCS_BATCH(batch, msg[2]);       
1023     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1024     
1025     /*MaxSizeInWord and TargetSzieInWord*/
1026     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1027                   (target_mb_size << 16) );
1028
1029     OUT_BCS_BATCH(batch, 0);
1030
1031     ADVANCE_BCS_BATCH(batch);
1032
1033     return len_in_dwords;
1034 }
1035
1036 static int
1037 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1038                               unsigned int *msg, unsigned int offset,
1039                               struct intel_encoder_context *encoder_context,
1040                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1041                               struct intel_batchbuffer *batch)
1042 {
1043     int len_in_dwords = 12;
1044         unsigned int inter_msg = 0;
1045     if (batch == NULL)
1046         batch = encoder_context->base.batch;
1047     {
1048 #define MSG_MV_OFFSET   4
1049         unsigned int *mv_ptr;
1050         mv_ptr = msg + MSG_MV_OFFSET;
1051         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1052          * to convert them to be compatible with the format of AVC_PAK
1053          * command.
1054          */
1055         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1056                 /* MV[0] and MV[2] are replicated */
1057                 mv_ptr[4] = mv_ptr[0];
1058                 mv_ptr[5] = mv_ptr[1];
1059                 mv_ptr[2] = mv_ptr[8];
1060                 mv_ptr[3] = mv_ptr[9];
1061                 mv_ptr[6] = mv_ptr[8]; 
1062                 mv_ptr[7] = mv_ptr[9]; 
1063         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1064                 /* MV[0] and MV[1] are replicated */
1065                 mv_ptr[2] = mv_ptr[0];  
1066                 mv_ptr[3] = mv_ptr[1];
1067                 mv_ptr[4] = mv_ptr[16]; 
1068                 mv_ptr[5] = mv_ptr[17]; 
1069                 mv_ptr[6] = mv_ptr[24];
1070                 mv_ptr[7] = mv_ptr[25];
1071         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1072                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1073                 /* Don't touch MV[0] or MV[1] */
1074                 mv_ptr[2] = mv_ptr[8];
1075                 mv_ptr[3] = mv_ptr[9];
1076                 mv_ptr[4] = mv_ptr[16];
1077                 mv_ptr[5] = mv_ptr[17];
1078                 mv_ptr[6] = mv_ptr[24];
1079                 mv_ptr[7] = mv_ptr[25];
1080         }
1081     }
1082
1083     BEGIN_BCS_BATCH(batch, len_in_dwords);
1084
1085     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1086
1087         inter_msg = 32;
1088         /* MV quantity */
1089         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1090                 if (msg[1] & SUBMB_SHAPE_MASK)
1091                         inter_msg = 128;
1092         }
1093     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1094     OUT_BCS_BATCH(batch, offset);
1095         inter_msg = msg[0] & (0x1F00FFFF);
1096         inter_msg |= INTER_MV8;
1097         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1098         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1099                         (msg[1] & SUBMB_SHAPE_MASK)) {
1100                 inter_msg |= INTER_MV32;
1101         }
1102
1103     OUT_BCS_BATCH(batch, inter_msg);
1104
1105     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1106     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1107 #if 0 
1108     if ( slice_type == SLICE_TYPE_B) {
1109         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1110     } else {
1111         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1112     }
1113 #else
1114     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1115 #endif
1116
1117         inter_msg = msg[1] >> 8;
1118     /*Stuff for Inter MB*/
1119     OUT_BCS_BATCH(batch, inter_msg);        
1120     OUT_BCS_BATCH(batch, 0x0);    
1121     OUT_BCS_BATCH(batch, 0x0);        
1122
1123     /*MaxSizeInWord and TargetSzieInWord*/
1124     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1125                   (target_mb_size << 16) );
1126
1127     OUT_BCS_BATCH(batch, 0x0);    
1128
1129     ADVANCE_BCS_BATCH(batch);
1130
1131     return len_in_dwords;
1132 }
1133
1134 #define         AVC_INTRA_RDO_OFFSET    4
1135 #define         AVC_INTER_RDO_OFFSET    10
1136 #define         AVC_INTER_MSG_OFFSET    8       
1137 #define         AVC_INTER_MV_OFFSET             48
1138 #define         AVC_RDO_MASK            0xFFFF
1139
1140 static void 
1141 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1142                                        struct encode_state *encode_state,
1143                                        struct intel_encoder_context *encoder_context,
1144                                        int slice_index,
1145                                        struct intel_batchbuffer *slice_batch)
1146 {
1147     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1148     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1149     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1150     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1151     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1152     unsigned int *msg = NULL, offset = 0;
1153     unsigned char *msg_ptr = NULL;
1154     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1155     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1156     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1157     int i,x,y;
1158     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1159     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1160     unsigned char *slice_header = NULL;
1161     int slice_header_length_in_bits = 0;
1162     unsigned int tail_data[] = { 0x0, 0x0 };
1163     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1164     int is_intra = slice_type == SLICE_TYPE_I;
1165
1166     if (rate_control_mode == VA_RC_CBR) {
1167         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1168         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1169     }
1170
1171     /* only support for 8-bit pixel bit-depth */
1172     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1173     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1174     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1175     assert(qp >= 0 && qp < 52);
1176
1177     gen75_mfc_avc_slice_state(ctx, 
1178                              pPicParameter,
1179                              pSliceParameter,
1180                              encode_state, encoder_context,
1181                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1182
1183     if ( slice_index == 0) 
1184         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1185
1186     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1187
1188     // slice hander
1189     mfc_context->insert_object(ctx, encoder_context,
1190                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1191                                5,  /* first 5 bytes are start code + nal unit type */
1192                                1, 0, 1, slice_batch);
1193
1194     dri_bo_map(vme_context->vme_output.bo , 1);
1195     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1196
1197     if (is_intra) {
1198         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1199     } else {
1200         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1201     }
1202    
1203     for (i = pSliceParameter->macroblock_address; 
1204          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1205         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1206         x = i % width_in_mbs;
1207         y = i / width_in_mbs;
1208         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1209
1210         if (is_intra) {
1211             assert(msg);
1212             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1213         } else {
1214             int inter_rdo, intra_rdo;
1215             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1216             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1217             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1218             if (intra_rdo < inter_rdo) { 
1219                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1220             } else {
1221                 msg += AVC_INTER_MSG_OFFSET;
1222                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1223             }
1224         }
1225     }
1226    
1227     dri_bo_unmap(vme_context->vme_output.bo);
1228
1229     if ( last_slice ) {    
1230         mfc_context->insert_object(ctx, encoder_context,
1231                                    tail_data, 2, 8,
1232                                    2, 1, 1, 0, slice_batch);
1233     } else {
1234         mfc_context->insert_object(ctx, encoder_context,
1235                                    tail_data, 1, 8,
1236                                    1, 1, 1, 0, slice_batch);
1237     }
1238
1239     free(slice_header);
1240
1241 }
1242
1243 static dri_bo *
1244 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1245                                   struct encode_state *encode_state,
1246                                   struct intel_encoder_context *encoder_context)
1247 {
1248     struct i965_driver_data *i965 = i965_driver_data(ctx);
1249     struct intel_batchbuffer *batch;
1250     dri_bo *batch_bo;
1251     int i;
1252     int buffer_size;
1253     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1254     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1255     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1256
1257     buffer_size = width_in_mbs * height_in_mbs * 64;
1258     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1259     batch_bo = batch->buffer;
1260     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1261         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1262     }
1263
1264     intel_batchbuffer_align(batch, 8);
1265     
1266     BEGIN_BCS_BATCH(batch, 2);
1267     OUT_BCS_BATCH(batch, 0);
1268     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1269     ADVANCE_BCS_BATCH(batch);
1270
1271     dri_bo_reference(batch_bo);
1272     intel_batchbuffer_free(batch);
1273
1274     return batch_bo;
1275 }
1276
1277 #else
1278
1279 static void
1280 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1281                                     struct encode_state *encode_state,
1282                                     struct intel_encoder_context *encoder_context)
1283
1284 {
1285     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1286     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1287
1288     assert(vme_context->vme_output.bo);
1289     mfc_context->buffer_suface_setup(ctx,
1290                                      &mfc_context->gpe_context,
1291                                      &vme_context->vme_output,
1292                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1293                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1294     assert(mfc_context->aux_batchbuffer_surface.bo);
1295     mfc_context->buffer_suface_setup(ctx,
1296                                      &mfc_context->gpe_context,
1297                                      &mfc_context->aux_batchbuffer_surface,
1298                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1299                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1300 }
1301
1302 static void
1303 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1304                                      struct encode_state *encode_state,
1305                                      struct intel_encoder_context *encoder_context)
1306
1307 {
1308     struct i965_driver_data *i965 = i965_driver_data(ctx);
1309     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1310     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1311     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1312     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1313     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1314     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1315     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1316     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1317                                                            "MFC batchbuffer",
1318                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1319                                                            0x1000);
1320     mfc_context->buffer_suface_setup(ctx,
1321                                      &mfc_context->gpe_context,
1322                                      &mfc_context->mfc_batchbuffer_surface,
1323                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1324                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1325 }
1326
1327 static void
1328 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1329                                     struct encode_state *encode_state,
1330                                     struct intel_encoder_context *encoder_context)
1331 {
1332     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1333     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1334 }
1335
1336 static void
1337 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1338                                 struct encode_state *encode_state,
1339                                 struct intel_encoder_context *encoder_context)
1340 {
1341     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1342     struct gen6_interface_descriptor_data *desc;   
1343     int i;
1344     dri_bo *bo;
1345
1346     bo = mfc_context->gpe_context.idrt.bo;
1347     dri_bo_map(bo, 1);
1348     assert(bo->virtual);
1349     desc = bo->virtual;
1350
1351     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1352         struct i965_kernel *kernel;
1353
1354         kernel = &mfc_context->gpe_context.kernels[i];
1355         assert(sizeof(*desc) == 32);
1356
1357         /*Setup the descritor table*/
1358         memset(desc, 0, sizeof(*desc));
1359         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1360         desc->desc2.sampler_count = 0;
1361         desc->desc2.sampler_state_pointer = 0;
1362         desc->desc3.binding_table_entry_count = 2;
1363         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1364         desc->desc4.constant_urb_entry_read_offset = 0;
1365         desc->desc4.constant_urb_entry_read_length = 4;
1366                 
1367         /*kernel start*/
1368         dri_bo_emit_reloc(bo,   
1369                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1370                           0,
1371                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1372                           kernel->bo);
1373         desc++;
1374     }
1375
1376     dri_bo_unmap(bo);
1377 }
1378
1379 static void
1380 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1381                                     struct encode_state *encode_state,
1382                                     struct intel_encoder_context *encoder_context)
1383 {
1384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1385     
1386     (void)mfc_context;
1387 }
1388
1389 static void
1390 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1391                                          int index,
1392                                          int head_offset,
1393                                          int batchbuffer_offset,
1394                                          int head_size,
1395                                          int tail_size,
1396                                          int number_mb_cmds,
1397                                          int first_object,
1398                                          int last_object,
1399                                          int last_slice,
1400                                          int mb_x,
1401                                          int mb_y,
1402                                          int width_in_mbs,
1403                                          int qp)
1404 {
1405     BEGIN_BATCH(batch, 12);
1406     
1407     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1408     OUT_BATCH(batch, index);
1409     OUT_BATCH(batch, 0);
1410     OUT_BATCH(batch, 0);
1411     OUT_BATCH(batch, 0);
1412     OUT_BATCH(batch, 0);
1413    
1414     /*inline data */
1415     OUT_BATCH(batch, head_offset);
1416     OUT_BATCH(batch, batchbuffer_offset);
1417     OUT_BATCH(batch, 
1418               head_size << 16 |
1419               tail_size);
1420     OUT_BATCH(batch,
1421               number_mb_cmds << 16 |
1422               first_object << 2 |
1423               last_object << 1 |
1424               last_slice);
1425     OUT_BATCH(batch,
1426               mb_y << 8 |
1427               mb_x);
1428     OUT_BATCH(batch,
1429               qp << 16 |
1430               width_in_mbs);
1431
1432     ADVANCE_BATCH(batch);
1433 }
1434
1435 static void
1436 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1437                                        struct intel_encoder_context *encoder_context,
1438                                        VAEncSliceParameterBufferH264 *slice_param,
1439                                        int head_offset,
1440                                        unsigned short head_size,
1441                                        unsigned short tail_size,
1442                                        int batchbuffer_offset,
1443                                        int qp,
1444                                        int last_slice)
1445 {
1446     struct intel_batchbuffer *batch = encoder_context->base.batch;
1447     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1448     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1449     int total_mbs = slice_param->num_macroblocks;
1450     int number_mb_cmds = 128;
1451     int starting_mb = 0;
1452     int last_object = 0;
1453     int first_object = 1;
1454     int i;
1455     int mb_x, mb_y;
1456     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1457
1458     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1459         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1460         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1461         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1462         assert(mb_x <= 255 && mb_y <= 255);
1463
1464         starting_mb += number_mb_cmds;
1465
1466         gen75_mfc_batchbuffer_emit_object_command(batch,
1467                                                  index,
1468                                                  head_offset,
1469                                                  batchbuffer_offset,
1470                                                  head_size,
1471                                                  tail_size,
1472                                                  number_mb_cmds,
1473                                                  first_object,
1474                                                  last_object,
1475                                                  last_slice,
1476                                                  mb_x,
1477                                                  mb_y,
1478                                                  width_in_mbs,
1479                                                  qp);
1480
1481         if (first_object) {
1482             head_offset += head_size;
1483             batchbuffer_offset += head_size;
1484         }
1485
1486         if (last_object) {
1487             head_offset += tail_size;
1488             batchbuffer_offset += tail_size;
1489         }
1490
1491         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1492
1493         first_object = 0;
1494     }
1495
1496     if (!last_object) {
1497         last_object = 1;
1498         number_mb_cmds = total_mbs % number_mb_cmds;
1499         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1500         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1501         assert(mb_x <= 255 && mb_y <= 255);
1502         starting_mb += number_mb_cmds;
1503
1504         gen75_mfc_batchbuffer_emit_object_command(batch,
1505                                                  index,
1506                                                  head_offset,
1507                                                  batchbuffer_offset,
1508                                                  head_size,
1509                                                  tail_size,
1510                                                  number_mb_cmds,
1511                                                  first_object,
1512                                                  last_object,
1513                                                  last_slice,
1514                                                  mb_x,
1515                                                  mb_y,
1516                                                  width_in_mbs,
1517                                                  qp);
1518     }
1519 }
1520                           
1521 /*
1522  * return size in Owords (16bytes)
1523  */         
1524 static int
1525 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1526                                struct encode_state *encode_state,
1527                                struct intel_encoder_context *encoder_context,
1528                                int slice_index,
1529                                int batchbuffer_offset)
1530 {
1531     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1532     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1533     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1534     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1535     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1536     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1537     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1538     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1539     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1540     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1541     unsigned char *slice_header = NULL;
1542     int slice_header_length_in_bits = 0;
1543     unsigned int tail_data[] = { 0x0, 0x0 };
1544     long head_offset;
1545     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1546     unsigned short head_size, tail_size;
1547     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1548
1549     if (rate_control_mode == VA_RC_CBR) {
1550         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1551         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1552     }
1553
1554     /* only support for 8-bit pixel bit-depth */
1555     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1556     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1557     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1558     assert(qp >= 0 && qp < 52);
1559
1560     head_offset = old_used / 16;
1561     gen75_mfc_avc_slice_state(ctx,
1562                              pPicParameter,
1563                              pSliceParameter,
1564                              encode_state,
1565                              encoder_context,
1566                              (rate_control_mode == VA_RC_CBR),
1567                              qp,
1568                              slice_batch);
1569
1570     if (slice_index == 0)
1571         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1572
1573     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1574
1575     // slice hander
1576     mfc_context->insert_object(ctx,
1577                                encoder_context,
1578                                (unsigned int *)slice_header,
1579                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1580                                slice_header_length_in_bits & 0x1f,
1581                                5,  /* first 5 bytes are start code + nal unit type */
1582                                1,
1583                                0,
1584                                1,
1585                                slice_batch);
1586     free(slice_header);
1587
1588     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1589     used = intel_batchbuffer_used_size(slice_batch);
1590     head_size = (used - old_used) / 16;
1591     old_used = used;
1592
1593     /* tail */
1594     if (last_slice) {    
1595         mfc_context->insert_object(ctx,
1596                                    encoder_context,
1597                                    tail_data,
1598                                    2,
1599                                    8,
1600                                    2,
1601                                    1,
1602                                    1,
1603                                    0,
1604                                    slice_batch);
1605     } else {
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    1,
1610                                    8,
1611                                    1,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     }
1617
1618     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1619     used = intel_batchbuffer_used_size(slice_batch);
1620     tail_size = (used - old_used) / 16;
1621
1622    
1623     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1624                                            encoder_context,
1625                                            pSliceParameter,
1626                                            head_offset,
1627                                            head_size,
1628                                            tail_size,
1629                                            batchbuffer_offset,
1630                                            qp,
1631                                            last_slice);
1632
1633     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1634 }
1635
1636 static void
1637 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1638                                   struct encode_state *encode_state,
1639                                   struct intel_encoder_context *encoder_context)
1640 {
1641     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1642     struct intel_batchbuffer *batch = encoder_context->base.batch;
1643     int i, size, offset = 0;
1644     intel_batchbuffer_start_atomic(batch, 0x4000); 
1645     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1646
1647     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1648         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1649         offset += size;
1650     }
1651
1652     intel_batchbuffer_end_atomic(batch);
1653     intel_batchbuffer_flush(batch);
1654 }
1655
1656 static void
1657 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1658                                struct encode_state *encode_state,
1659                                struct intel_encoder_context *encoder_context)
1660 {
1661     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1662     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1663     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1664     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1665 }
1666
1667 static dri_bo *
1668 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1669                                   struct encode_state *encode_state,
1670                                   struct intel_encoder_context *encoder_context)
1671 {
1672     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1673
1674     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1675     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1676
1677     return mfc_context->mfc_batchbuffer_surface.bo;
1678 }
1679
1680 #endif
1681
1682 static void
1683 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1684                                  struct encode_state *encode_state,
1685                                  struct intel_encoder_context *encoder_context)
1686 {
1687     struct intel_batchbuffer *batch = encoder_context->base.batch;
1688     dri_bo *slice_batch_bo;
1689
1690     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1691         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1692         assert(0);
1693         return; 
1694     }
1695
1696 #ifdef MFC_SOFTWARE_HASWELL
1697     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1698 #else
1699     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1700 #endif
1701
1702     // begin programing
1703     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1704     intel_batchbuffer_emit_mi_flush(batch);
1705     
1706     // picture level programing
1707     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1708
1709     BEGIN_BCS_BATCH(batch, 2);
1710     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1711     OUT_BCS_RELOC(batch,
1712                   slice_batch_bo,
1713                   I915_GEM_DOMAIN_COMMAND, 0, 
1714                   0);
1715     ADVANCE_BCS_BATCH(batch);
1716
1717     // end programing
1718     intel_batchbuffer_end_atomic(batch);
1719
1720     dri_bo_unreference(slice_batch_bo);
1721 }
1722
1723
1724 static VAStatus
1725 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1726                             struct encode_state *encode_state,
1727                             struct intel_encoder_context *encoder_context)
1728 {
1729     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1730     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1731     int current_frame_bits_size;
1732     int sts;
1733  
1734     for (;;) {
1735         gen75_mfc_init(ctx, encode_state, encoder_context);
1736         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1737         /*Programing bcs pipeline*/
1738         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1739         gen75_mfc_run(ctx, encode_state, encoder_context);
1740         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1741             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1742             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1743             if (sts == BRC_NO_HRD_VIOLATION) {
1744                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1745                 break;
1746             }
1747             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1748                 if (!mfc_context->hrd.violation_noted) {
1749                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1750                     mfc_context->hrd.violation_noted = 1;
1751                 }
1752                 return VA_STATUS_SUCCESS;
1753             }
1754         } else {
1755             break;
1756         }
1757     }
1758
1759     return VA_STATUS_SUCCESS;
1760 }
1761
1762 /*
1763  * MPEG-2
1764  */
1765
1766 static const int
1767 va_to_gen75_mpeg2_picture_type[3] = {
1768     1,  /* I */
1769     2,  /* P */
1770     3   /* B */
1771 };
1772
1773 static void
1774 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1775                           struct intel_encoder_context *encoder_context,
1776                           struct encode_state *encode_state)
1777 {
1778     struct intel_batchbuffer *batch = encoder_context->base.batch;
1779     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1780     VAEncPictureParameterBufferMPEG2 *pic_param;
1781     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1782     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1783
1784     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1785     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1786
1787     BEGIN_BCS_BATCH(batch, 13);
1788     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1789     OUT_BCS_BATCH(batch,
1790                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1791                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1792                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1793                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1794                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1795                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1796                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1797                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1798                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1799                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1800                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1801                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1802     OUT_BCS_BATCH(batch,
1803                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1804                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1805                   0);
1806     OUT_BCS_BATCH(batch,
1807                   1 << 31 |     /* slice concealment */
1808                   (height_in_mbs - 1) << 16 |
1809                   (width_in_mbs - 1));
1810     OUT_BCS_BATCH(batch, 0);
1811     OUT_BCS_BATCH(batch, 0);
1812     OUT_BCS_BATCH(batch,
1813                   0xFFF << 16 | /* InterMBMaxSize */
1814                   0xFFF << 0 |  /* IntraMBMaxSize */
1815                   0);
1816     OUT_BCS_BATCH(batch, 0);
1817     OUT_BCS_BATCH(batch, 0);
1818     OUT_BCS_BATCH(batch, 0);
1819     OUT_BCS_BATCH(batch, 0);
1820     OUT_BCS_BATCH(batch, 0);
1821     OUT_BCS_BATCH(batch, 0);
1822     ADVANCE_BCS_BATCH(batch);
1823 }
1824
1825 static void
1826 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1827 {
1828     unsigned char intra_qm[64] = {
1829          8, 16, 19, 22, 26, 27, 29, 34,
1830         16, 16, 22, 24, 27, 29, 34, 37,
1831         19, 22, 26, 27, 29, 34, 34, 38,
1832         22, 22, 26, 27, 29, 34, 37, 40,
1833         22, 26, 27, 29, 32, 35, 40, 48,
1834         26, 27, 29, 32, 35, 40, 48, 58,
1835         26, 27, 29, 34, 38, 46, 56, 69,
1836         27, 29, 35, 38, 46, 56, 69, 83
1837     };
1838
1839     unsigned char non_intra_qm[64] = {
1840         16, 16, 16, 16, 16, 16, 16, 16,
1841         16, 16, 16, 16, 16, 16, 16, 16,
1842         16, 16, 16, 16, 16, 16, 16, 16,
1843         16, 16, 16, 16, 16, 16, 16, 16,
1844         16, 16, 16, 16, 16, 16, 16, 16,
1845         16, 16, 16, 16, 16, 16, 16, 16,
1846         16, 16, 16, 16, 16, 16, 16, 16,
1847         16, 16, 16, 16, 16, 16, 16, 16
1848     };
1849
1850     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1851     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1852 }
1853
1854 static void
1855 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1856 {
1857     unsigned short intra_fqm[64] = {
1858          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1859          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1860          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1861          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1862          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1863          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1864          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1865          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1866     };
1867
1868     unsigned short non_intra_fqm[64] = {
1869         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1870         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1871         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1872         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1873         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1874         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1875         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1876         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1877     };
1878
1879     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1880     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1881 }
1882
1883 static void
1884 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1885                                  struct intel_encoder_context *encoder_context,
1886                                  int x, int y,
1887                                  int next_x, int next_y,
1888                                  int is_fisrt_slice_group,
1889                                  int is_last_slice_group,
1890                                  int intra_slice,
1891                                  int qp,
1892                                  struct intel_batchbuffer *batch)
1893 {
1894     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1895
1896     if (batch == NULL)
1897         batch = encoder_context->base.batch;
1898
1899     BEGIN_BCS_BATCH(batch, 8);
1900
1901     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1902     OUT_BCS_BATCH(batch,
1903                   0 << 31 |                             /* MbRateCtrlFlag */
1904                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1905                   1 << 17 |                             /* Insert Header before the first slice group data */
1906                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1907                   1 << 15 |                             /* TailPresentFlag: always 1 */
1908                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1909                   !!intra_slice << 13 |                 /* IntraSlice */
1910                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1911                   0);
1912     OUT_BCS_BATCH(batch,
1913                   next_y << 24 |
1914                   next_x << 16 |
1915                   y << 8 |
1916                   x << 0 |
1917                   0);
1918     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1919     /* bitstream pointer is only loaded once for the first slice of a frame when 
1920      * LoadSlicePointerFlag is 0
1921      */
1922     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1923     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1924     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1925     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1926
1927     ADVANCE_BCS_BATCH(batch);
1928 }
1929
1930 static int
1931 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1932                                  struct intel_encoder_context *encoder_context,
1933                                  int x, int y,
1934                                  int first_mb_in_slice,
1935                                  int last_mb_in_slice,
1936                                  int first_mb_in_slice_group,
1937                                  int last_mb_in_slice_group,
1938                                  int mb_type,
1939                                  int qp_scale_code,
1940                                  int coded_block_pattern,
1941                                  unsigned char target_size_in_word,
1942                                  unsigned char max_size_in_word,
1943                                  struct intel_batchbuffer *batch)
1944 {
1945     int len_in_dwords = 9;
1946
1947     if (batch == NULL)
1948         batch = encoder_context->base.batch;
1949
1950     BEGIN_BCS_BATCH(batch, len_in_dwords);
1951
1952     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1953     OUT_BCS_BATCH(batch,
1954                   0 << 24 |     /* PackedMvNum */
1955                   0 << 20 |     /* MvFormat */
1956                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1957                   0 << 15 |     /* TransformFlag: frame DCT */
1958                   0 << 14 |     /* FieldMbFlag */
1959                   1 << 13 |     /* IntraMbFlag */
1960                   mb_type << 8 |   /* MbType: Intra */
1961                   0 << 2 |      /* SkipMbFlag */
1962                   0 << 0 |      /* InterMbMode */
1963                   0);
1964     OUT_BCS_BATCH(batch, y << 16 | x);
1965     OUT_BCS_BATCH(batch,
1966                   max_size_in_word << 24 |
1967                   target_size_in_word << 16 |
1968                   coded_block_pattern << 6 |      /* CBP */
1969                   0);
1970     OUT_BCS_BATCH(batch,
1971                   last_mb_in_slice << 31 |
1972                   first_mb_in_slice << 30 |
1973                   0 << 27 |     /* EnableCoeffClamp */
1974                   last_mb_in_slice_group << 26 |
1975                   0 << 25 |     /* MbSkipConvDisable */
1976                   first_mb_in_slice_group << 24 |
1977                   0 << 16 |     /* MvFieldSelect */
1978                   qp_scale_code << 0 |
1979                   0);
1980     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1981     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1982     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1983     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1984
1985     ADVANCE_BCS_BATCH(batch);
1986
1987     return len_in_dwords;
1988 }
1989
1990 #define MPEG2_INTER_MV_OFFSET   12 
1991
1992 static struct _mv_ranges
1993 {
1994     int low;    /* in the unit of 1/2 pixel */
1995     int high;   /* in the unit of 1/2 pixel */
1996 } mv_ranges[] = {
1997     {0, 0},
1998     {-16, 15},
1999     {-32, 31},
2000     {-64, 63},
2001     {-128, 127},
2002     {-256, 255},
2003     {-512, 511},
2004     {-1024, 1023},
2005     {-2048, 2047},
2006     {-4096, 4095}
2007 };
2008
2009 static int
2010 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2011 {
2012     if (mv + pos * 16 * 2 < 0 ||
2013         mv + (pos + 1) * 16 * 2 > display_max * 2)
2014         mv = 0;
2015
2016     if (f_code > 0 && f_code < 10) {
2017         if (mv < mv_ranges[f_code].low)
2018             mv = mv_ranges[f_code].low;
2019
2020         if (mv > mv_ranges[f_code].high)
2021             mv = mv_ranges[f_code].high;
2022     }
2023
2024     return mv;
2025 }
2026
2027 static int
2028 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2029                                  struct encode_state *encode_state,
2030                                  struct intel_encoder_context *encoder_context,
2031                                  unsigned int *msg,
2032                                  int width_in_mbs, int height_in_mbs,
2033                                  int x, int y,
2034                                  int first_mb_in_slice,
2035                                  int last_mb_in_slice,
2036                                  int first_mb_in_slice_group,
2037                                  int last_mb_in_slice_group,
2038                                  int qp_scale_code,
2039                                  unsigned char target_size_in_word,
2040                                  unsigned char max_size_in_word,
2041                                  struct intel_batchbuffer *batch)
2042 {
2043     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2044     int len_in_dwords = 9;
2045     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2046     
2047     if (batch == NULL)
2048         batch = encoder_context->base.batch;
2049
2050     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2051     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2052     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2053     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2054     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2055
2056     BEGIN_BCS_BATCH(batch, len_in_dwords);
2057
2058     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2059     OUT_BCS_BATCH(batch,
2060                   2 << 24 |     /* PackedMvNum */
2061                   7 << 20 |     /* MvFormat */
2062                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2063                   0 << 15 |     /* TransformFlag: frame DCT */
2064                   0 << 14 |     /* FieldMbFlag */
2065                   0 << 13 |     /* IntraMbFlag */
2066                   1 << 8 |      /* MbType: Frame-based */
2067                   0 << 2 |      /* SkipMbFlag */
2068                   0 << 0 |      /* InterMbMode */
2069                   0);
2070     OUT_BCS_BATCH(batch, y << 16 | x);
2071     OUT_BCS_BATCH(batch,
2072                   max_size_in_word << 24 |
2073                   target_size_in_word << 16 |
2074                   0x3f << 6 |   /* CBP */
2075                   0);
2076     OUT_BCS_BATCH(batch,
2077                   last_mb_in_slice << 31 |
2078                   first_mb_in_slice << 30 |
2079                   0 << 27 |     /* EnableCoeffClamp */
2080                   last_mb_in_slice_group << 26 |
2081                   0 << 25 |     /* MbSkipConvDisable */
2082                   first_mb_in_slice_group << 24 |
2083                   0 << 16 |     /* MvFieldSelect */
2084                   qp_scale_code << 0 |
2085                   0);
2086
2087     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2088     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2089     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2090     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2091
2092     ADVANCE_BCS_BATCH(batch);
2093
2094     return len_in_dwords;
2095 }
2096
2097 static void
2098 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2099                                            struct encode_state *encode_state,
2100                                            struct intel_encoder_context *encoder_context,
2101                                            struct intel_batchbuffer *slice_batch)
2102 {
2103     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2104     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2105
2106     if (encode_state->packed_header_data[idx]) {
2107         VAEncPackedHeaderParameterBuffer *param = NULL;
2108         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2109         unsigned int length_in_bits;
2110
2111         assert(encode_state->packed_header_param[idx]);
2112         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2113         length_in_bits = param->bit_length;
2114
2115         mfc_context->insert_object(ctx,
2116                                    encoder_context,
2117                                    header_data,
2118                                    ALIGN(length_in_bits, 32) >> 5,
2119                                    length_in_bits & 0x1f,
2120                                    5,   /* FIXME: check it */
2121                                    0,
2122                                    0,
2123                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2124                                    slice_batch);
2125     }
2126
2127     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2128
2129     if (encode_state->packed_header_data[idx]) {
2130         VAEncPackedHeaderParameterBuffer *param = NULL;
2131         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2132         unsigned int length_in_bits;
2133
2134         assert(encode_state->packed_header_param[idx]);
2135         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2136         length_in_bits = param->bit_length;
2137
2138         mfc_context->insert_object(ctx,
2139                                    encoder_context,
2140                                    header_data,
2141                                    ALIGN(length_in_bits, 32) >> 5,
2142                                    length_in_bits & 0x1f,
2143                                    5,   /* FIXME: check it */
2144                                    0,
2145                                    0,
2146                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2147                                    slice_batch);
2148     }
2149 }
2150
2151 static void 
2152 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2153                                      struct encode_state *encode_state,
2154                                      struct intel_encoder_context *encoder_context,
2155                                      int slice_index,
2156                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2157                                      struct intel_batchbuffer *slice_batch)
2158 {
2159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2160     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2161     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2162     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2163     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2164     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2165     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2166     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2167     int i, j;
2168     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2169     unsigned int *msg = NULL;
2170     unsigned char *msg_ptr = NULL;
2171
2172     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2173     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2174     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2175     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2176
2177     dri_bo_map(vme_context->vme_output.bo , 0);
2178     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2179
2180     if (next_slice_group_param) {
2181         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2182         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2183     } else {
2184         h_next_start_pos = 0;
2185         v_next_start_pos = height_in_mbs;
2186     }
2187
2188     gen75_mfc_mpeg2_slicegroup_state(ctx,
2189                                      encoder_context,
2190                                      h_start_pos,
2191                                      v_start_pos,
2192                                      h_next_start_pos,
2193                                      v_next_start_pos,
2194                                      slice_index == 0,
2195                                      next_slice_group_param == NULL,
2196                                      slice_param->is_intra_slice,
2197                                      slice_param->quantiser_scale_code,
2198                                      slice_batch);
2199
2200     if (slice_index == 0) 
2201         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2202
2203     /* Insert '00' to make sure the header is valid */
2204     mfc_context->insert_object(ctx,
2205                                encoder_context,
2206                                (unsigned int*)section_delimiter,
2207                                1,
2208                                8,   /* 8bits in the last DWORD */
2209                                1,   /* 1 byte */
2210                                1,
2211                                0,
2212                                0,
2213                                slice_batch);
2214
2215     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2216         /* PAK for each macroblocks */
2217         for (j = 0; j < slice_param->num_macroblocks; j++) {
2218             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2219             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2220             int first_mb_in_slice = (j == 0);
2221             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2222             int first_mb_in_slice_group = (i == 0 && j == 0);
2223             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2224                                           j == slice_param->num_macroblocks - 1);
2225
2226             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2227
2228             if (slice_param->is_intra_slice) {
2229                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2230                                                  encoder_context,
2231                                                  h_pos, v_pos,
2232                                                  first_mb_in_slice,
2233                                                  last_mb_in_slice,
2234                                                  first_mb_in_slice_group,
2235                                                  last_mb_in_slice_group,
2236                                                  0x1a,
2237                                                  slice_param->quantiser_scale_code,
2238                                                  0x3f,
2239                                                  0,
2240                                                  0xff,
2241                                                  slice_batch);
2242             } else {
2243                 gen75_mfc_mpeg2_pak_object_inter(ctx,
2244                                                  encode_state,
2245                                                  encoder_context,
2246                                                  msg,
2247                                                  width_in_mbs, height_in_mbs,
2248                                                  h_pos, v_pos,
2249                                                  first_mb_in_slice,
2250                                                  last_mb_in_slice,
2251                                                  first_mb_in_slice_group,
2252                                                  last_mb_in_slice_group,
2253                                                  slice_param->quantiser_scale_code,
2254                                                  0,
2255                                                  0xff,
2256                                                  slice_batch);
2257             }
2258         }
2259
2260         slice_param++;
2261     }
2262
2263     dri_bo_unmap(vme_context->vme_output.bo);
2264
2265     /* tail data */
2266     if (next_slice_group_param == NULL) { /* end of a picture */
2267         mfc_context->insert_object(ctx,
2268                                    encoder_context,
2269                                    (unsigned int *)tail_delimiter,
2270                                    2,
2271                                    8,   /* 8bits in the last DWORD */
2272                                    5,   /* 5 bytes */
2273                                    1,
2274                                    1,
2275                                    0,
2276                                    slice_batch);
2277     } else {        /* end of a lsice group */
2278         mfc_context->insert_object(ctx,
2279                                    encoder_context,
2280                                    (unsigned int *)section_delimiter,
2281                                    1,
2282                                    8,   /* 8bits in the last DWORD */
2283                                    1,   /* 1 byte */
2284                                    1,
2285                                    1,
2286                                    0,
2287                                    slice_batch);
2288     }
2289 }
2290
2291 /* 
2292  * A batch buffer for all slices, including slice state, 
2293  * slice insert object and slice pak object commands
2294  *
2295  */
2296 static dri_bo *
2297 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2298                                            struct encode_state *encode_state,
2299                                            struct intel_encoder_context *encoder_context)
2300 {
2301     struct i965_driver_data *i965 = i965_driver_data(ctx);
2302     struct intel_batchbuffer *batch;
2303     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2304     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2305     dri_bo *batch_bo;
2306     int i;
2307     int buffer_size;
2308     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2309     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2310
2311     buffer_size = width_in_mbs * height_in_mbs * 64;
2312     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2313     batch_bo = batch->buffer;
2314
2315     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2316         if (i == encode_state->num_slice_params_ext - 1)
2317             next_slice_group_param = NULL;
2318         else
2319             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2320
2321         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2322     }
2323
2324     intel_batchbuffer_align(batch, 8);
2325     
2326     BEGIN_BCS_BATCH(batch, 2);
2327     OUT_BCS_BATCH(batch, 0);
2328     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2329     ADVANCE_BCS_BATCH(batch);
2330
2331     dri_bo_reference(batch_bo);
2332     intel_batchbuffer_free(batch);
2333
2334     return batch_bo;
2335 }
2336
2337 static void
2338 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2339                                             struct encode_state *encode_state,
2340                                             struct intel_encoder_context *encoder_context)
2341 {
2342     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2343
2344     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2345     mfc_context->set_surface_state(ctx, encoder_context);
2346     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2347     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2348     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2349     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2350     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2351     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2352 }
2353
2354 static void
2355 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2356                                     struct encode_state *encode_state,
2357                                     struct intel_encoder_context *encoder_context)
2358 {
2359     struct intel_batchbuffer *batch = encoder_context->base.batch;
2360     dri_bo *slice_batch_bo;
2361
2362     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2363
2364     // begin programing
2365     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2366     intel_batchbuffer_emit_mi_flush(batch);
2367     
2368     // picture level programing
2369     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2370
2371     BEGIN_BCS_BATCH(batch, 2);
2372     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2373     OUT_BCS_RELOC(batch,
2374                   slice_batch_bo,
2375                   I915_GEM_DOMAIN_COMMAND, 0, 
2376                   0);
2377     ADVANCE_BCS_BATCH(batch);
2378
2379     // end programing
2380     intel_batchbuffer_end_atomic(batch);
2381
2382     dri_bo_unreference(slice_batch_bo);
2383 }
2384
2385 static VAStatus
2386 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2387                         struct encode_state *encode_state,
2388                         struct intel_encoder_context *encoder_context)
2389 {
2390     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2391     struct object_surface *obj_surface; 
2392     struct object_buffer *obj_buffer;
2393     struct i965_coded_buffer_segment *coded_buffer_segment;
2394     VAStatus vaStatus = VA_STATUS_SUCCESS;
2395     dri_bo *bo;
2396     int i;
2397
2398     /* reconstructed surface */
2399     obj_surface = encode_state->reconstructed_object;
2400     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2401     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2402     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2403     mfc_context->surface_state.width = obj_surface->orig_width;
2404     mfc_context->surface_state.height = obj_surface->orig_height;
2405     mfc_context->surface_state.w_pitch = obj_surface->width;
2406     mfc_context->surface_state.h_pitch = obj_surface->height;
2407
2408     /* forward reference */
2409     obj_surface = encode_state->reference_objects[0];
2410
2411     if (obj_surface && obj_surface->bo) {
2412         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2413         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2414     } else
2415         mfc_context->reference_surfaces[0].bo = NULL;
2416
2417     /* backward reference */
2418     obj_surface = encode_state->reference_objects[1];
2419
2420     if (obj_surface && obj_surface->bo) {
2421         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2422         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2423     } else {
2424         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2425
2426         if (mfc_context->reference_surfaces[1].bo)
2427             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2428     }
2429
2430     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2431         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2432
2433         if (mfc_context->reference_surfaces[i].bo)
2434             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2435     }
2436     
2437     /* input YUV surface */
2438     obj_surface = encode_state->input_yuv_object;
2439     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2440     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2441
2442     /* coded buffer */
2443     obj_buffer = encode_state->coded_buf_object;
2444     bo = obj_buffer->buffer_store->bo;
2445     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2446     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2447     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2448     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2449
2450     /* set the internal flag to 0 to indicate the coded size is unknown */
2451     dri_bo_map(bo, 1);
2452     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2453     coded_buffer_segment->mapped = 0;
2454     coded_buffer_segment->codec = encoder_context->codec;
2455     dri_bo_unmap(bo);
2456
2457     return vaStatus;
2458 }
2459
2460 static VAStatus
2461 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2462                                struct encode_state *encode_state,
2463                                struct intel_encoder_context *encoder_context)
2464 {
2465     gen75_mfc_init(ctx, encode_state, encoder_context);
2466     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2467     /*Programing bcs pipeline*/
2468     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2469     gen75_mfc_run(ctx, encode_state, encoder_context);
2470
2471     return VA_STATUS_SUCCESS;
2472 }
2473
2474 static void
2475 gen75_mfc_context_destroy(void *context)
2476 {
2477     struct gen6_mfc_context *mfc_context = context;
2478     int i;
2479
2480     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2481     mfc_context->post_deblocking_output.bo = NULL;
2482
2483     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2484     mfc_context->pre_deblocking_output.bo = NULL;
2485
2486     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2487     mfc_context->uncompressed_picture_source.bo = NULL;
2488
2489     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2490     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2491
2492     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2493         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2494         mfc_context->direct_mv_buffers[i].bo = NULL;
2495     }
2496
2497     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2498     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2499
2500     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2501     mfc_context->macroblock_status_buffer.bo = NULL;
2502
2503     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2504     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2505
2506     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2507     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2508
2509     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2510         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2511         mfc_context->reference_surfaces[i].bo = NULL;  
2512     }
2513
2514     i965_gpe_context_destroy(&mfc_context->gpe_context);
2515
2516     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2517     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2518
2519     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2520     mfc_context->aux_batchbuffer_surface.bo = NULL;
2521
2522     if (mfc_context->aux_batchbuffer)
2523         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2524
2525     mfc_context->aux_batchbuffer = NULL;
2526
2527     free(mfc_context);
2528 }
2529
2530 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2531                   VAProfile profile,
2532                   struct encode_state *encode_state,
2533                   struct intel_encoder_context *encoder_context)
2534 {
2535     VAStatus vaStatus;
2536
2537     switch (profile) {
2538     case VAProfileH264Baseline:
2539     case VAProfileH264Main:
2540     case VAProfileH264High:
2541         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2542         break;
2543
2544         /* FIXME: add for other profile */
2545     case VAProfileMPEG2Simple:
2546     case VAProfileMPEG2Main:
2547         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2548         break;
2549
2550     default:
2551         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2552         break;
2553     }
2554
2555     return vaStatus;
2556 }
2557
2558 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2559 {
2560     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2561
2562     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2563
2564     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2565     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2566
2567     mfc_context->gpe_context.curbe.length = 32 * 4;
2568
2569     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2570     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2571     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2572     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2573     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2574
2575     i965_gpe_load_kernels(ctx,
2576                           &mfc_context->gpe_context,
2577                           gen75_mfc_kernels,
2578                           NUM_MFC_KERNEL);
2579
2580     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2581     mfc_context->set_surface_state = gen75_mfc_surface_state;
2582     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2583     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2584     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2585     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2586     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2587     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2588
2589     encoder_context->mfc_context = mfc_context;
2590     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2591     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2592     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2593
2594     return True;
2595 }