PAK encoding uses the reference list parsed from slice_param instead of hacked DPB
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                 struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164         /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168         /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
429     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
430     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
431
432     /*Encode common setup for MFC*/
433     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
434     mfc_context->post_deblocking_output.bo = NULL;
435
436     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
437     mfc_context->pre_deblocking_output.bo = NULL;
438
439     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
440     mfc_context->uncompressed_picture_source.bo = NULL;
441
442     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
443     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
444
445     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
446         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
447         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
448         mfc_context->direct_mv_buffers[i].bo = NULL;
449     }
450
451     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
452         if (mfc_context->reference_surfaces[i].bo != NULL)
453             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
454         mfc_context->reference_surfaces[i].bo = NULL;  
455     }
456
457     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
458     bo = dri_bo_alloc(i965->intel.bufmgr,
459                       "Buffer",
460                       width_in_mbs * 64,
461                       64);
462     assert(bo);
463     mfc_context->intra_row_store_scratch_buffer.bo = bo;
464
465     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
466     bo = dri_bo_alloc(i965->intel.bufmgr,
467                       "Buffer",
468                       width_in_mbs * height_in_mbs * 16,
469                       64);
470     assert(bo);
471     mfc_context->macroblock_status_buffer.bo = bo;
472
473     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
474     bo = dri_bo_alloc(i965->intel.bufmgr,
475                       "Buffer",
476                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
477                       64);
478     assert(bo);
479     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
480
481     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
482     bo = dri_bo_alloc(i965->intel.bufmgr,
483                       "Buffer",
484                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
485                       0x1000);
486     assert(bo);
487     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
488
489     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
490     mfc_context->mfc_batchbuffer_surface.bo = NULL;
491
492     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
493     mfc_context->aux_batchbuffer_surface.bo = NULL;
494
495     if (mfc_context->aux_batchbuffer)
496         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
497
498     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
499     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
500     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
501     mfc_context->aux_batchbuffer_surface.pitch = 16;
502     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
503     mfc_context->aux_batchbuffer_surface.size_block = 16;
504
505     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
506 }
507
508 static void
509 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
510                                 struct intel_encoder_context *encoder_context)
511 {
512     struct intel_batchbuffer *batch = encoder_context->base.batch;
513     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514     int i;
515
516     BEGIN_BCS_BATCH(batch, 61);
517
518     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
519
520     /* the DW1-3 is for pre_deblocking */
521     if (mfc_context->pre_deblocking_output.bo)
522         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
523                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
524                       0);
525     else
526         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
527
528         OUT_BCS_BATCH(batch, 0);
529         OUT_BCS_BATCH(batch, 0);
530      /* the DW4-6 is for the post_deblocking */
531
532     if (mfc_context->post_deblocking_output.bo)
533         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
534                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                       0);                                                                                       /* post output addr  */ 
536     else
537         OUT_BCS_BATCH(batch, 0);
538         OUT_BCS_BATCH(batch, 0);
539         OUT_BCS_BATCH(batch, 0);
540
541      /* the DW7-9 is for the uncompressed_picture */
542     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
543                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544                   0); /* uncompressed data */
545
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW10-12 is for the mb status */
550     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0); /* StreamOut data*/
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW13-15 is for the intra_row_store_scratch */
557     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);   
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563      /* the DW16-18 is for the deblocking filter */
564     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
565                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566                   0);
567         OUT_BCS_BATCH(batch, 0);
568         OUT_BCS_BATCH(batch, 0);
569
570     /* the DW 19-50 is for Reference pictures*/
571     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
572         if ( mfc_context->reference_surfaces[i].bo != NULL) {
573             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
574                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                           0);                   
576         } else {
577             OUT_BCS_BATCH(batch, 0);
578         }
579         OUT_BCS_BATCH(batch, 0);
580     }
581         OUT_BCS_BATCH(batch, 0);
582
583         /* The DW 52-54 is for the MB status buffer */
584     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
585                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
586                   0);                                                                                   /* Macroblock status buffer*/
587         
588         OUT_BCS_BATCH(batch, 0);
589         OUT_BCS_BATCH(batch, 0);
590
591         /* the DW 55-57 is the ILDB buffer */
592         OUT_BCS_BATCH(batch, 0);
593         OUT_BCS_BATCH(batch, 0);
594         OUT_BCS_BATCH(batch, 0);
595
596         /* the DW 58-60 is the second ILDB buffer */
597         OUT_BCS_BATCH(batch, 0);
598         OUT_BCS_BATCH(batch, 0);
599         OUT_BCS_BATCH(batch, 0);
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
605 {
606     struct intel_batchbuffer *batch = encoder_context->base.batch;
607     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
608     struct i965_driver_data *i965 = i965_driver_data(ctx);
609     int i;
610
611     if (IS_STEPPING_BPLUS(i965)) {
612         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
613         return;
614     }
615
616     BEGIN_BCS_BATCH(batch, 25);
617
618     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
619
620     if (mfc_context->pre_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);
624     else
625         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
626
627     if (mfc_context->post_deblocking_output.bo)
628         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
629                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630                       0);                                                                                       /* post output addr  */ 
631     else
632         OUT_BCS_BATCH(batch, 0);
633
634     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
635                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                   0);                                                                                   /* uncompressed data */
637     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
638                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
639                   0);                                                                                   /* StreamOut data*/
640     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
641                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                   0);   
643     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
644                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
645                   0);
646     /* 7..22 Reference pictures*/
647     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
648         if ( mfc_context->reference_surfaces[i].bo != NULL) {
649             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
650                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                           0);                   
652         } else {
653             OUT_BCS_BATCH(batch, 0);
654         }
655     }
656     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);                                                                                   /* Macroblock status buffer*/
659
660         OUT_BCS_BATCH(batch, 0);
661
662     ADVANCE_BCS_BATCH(batch);
663 }
664
665 static void
666 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
667                                 struct intel_encoder_context *encoder_context)
668 {
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
671
672     int i;
673
674     BEGIN_BCS_BATCH(batch, 71);
675
676     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
677
678     /* Reference frames and Current frames */
679     /* the DW1-32 is for the direct MV for reference */
680     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
681         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
682             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
683                           I915_GEM_DOMAIN_INSTRUCTION, 0,
684                           0);
685             OUT_BCS_BATCH(batch, 0);
686         } else {
687             OUT_BCS_BATCH(batch, 0);
688             OUT_BCS_BATCH(batch, 0);
689         }
690     }
691         OUT_BCS_BATCH(batch, 0);
692
693         /* the DW34-36 is the MV for the current reference */
694         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697
698         OUT_BCS_BATCH(batch, 0);
699         OUT_BCS_BATCH(batch, 0);
700
701     /* POL list */
702     for(i = 0; i < 32; i++) {
703         OUT_BCS_BATCH(batch, i/2);
704     }
705     OUT_BCS_BATCH(batch, 0);
706     OUT_BCS_BATCH(batch, 0);
707
708     ADVANCE_BCS_BATCH(batch);
709 }
710
711 static void
712 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
713 {
714     struct intel_batchbuffer *batch = encoder_context->base.batch;
715     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
716     struct i965_driver_data *i965 = i965_driver_data(ctx);
717     int i;
718
719     if (IS_STEPPING_BPLUS(i965)) {
720         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
721         return;
722     }
723
724     BEGIN_BCS_BATCH(batch, 69);
725
726     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
727
728     /* Reference frames and Current frames */
729     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
730         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
731             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
732                           I915_GEM_DOMAIN_INSTRUCTION, 0,
733                           0);
734         } else {
735             OUT_BCS_BATCH(batch, 0);
736         }
737     }
738
739     /* POL list */
740     for(i = 0; i < 32; i++) {
741         OUT_BCS_BATCH(batch, i/2);
742     }
743     OUT_BCS_BATCH(batch, 0);
744     OUT_BCS_BATCH(batch, 0);
745
746     ADVANCE_BCS_BATCH(batch);
747 }
748
749
750 static void
751 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
752                                 struct intel_encoder_context *encoder_context)
753 {
754     struct intel_batchbuffer *batch = encoder_context->base.batch;
755     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
756
757     BEGIN_BCS_BATCH(batch, 10);
758
759     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
760     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
761                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
762                   0);
763     OUT_BCS_BATCH(batch, 0);
764     OUT_BCS_BATCH(batch, 0);
765         
766         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
767     OUT_BCS_BATCH(batch, 0);
768     OUT_BCS_BATCH(batch, 0);
769     OUT_BCS_BATCH(batch, 0);
770
771         /* the DW7-9 is for Bitplane Read Buffer Base Address */
772     OUT_BCS_BATCH(batch, 0);
773     OUT_BCS_BATCH(batch, 0);
774     OUT_BCS_BATCH(batch, 0);
775
776     ADVANCE_BCS_BATCH(batch);
777 }
778
779 static void
780 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
781 {
782     struct intel_batchbuffer *batch = encoder_context->base.batch;
783     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
784     struct i965_driver_data *i965 = i965_driver_data(ctx);
785
786     if (IS_STEPPING_BPLUS(i965)) {
787         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
788         return;
789     }
790
791     BEGIN_BCS_BATCH(batch, 4);
792
793     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
794     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
795                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
796                   0);
797     OUT_BCS_BATCH(batch, 0);
798     OUT_BCS_BATCH(batch, 0);
799
800     ADVANCE_BCS_BATCH(batch);
801 }
802
803
804 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
805                                       struct encode_state *encode_state,
806                                       struct intel_encoder_context *encoder_context)
807 {
808     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
809
810     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
811     mfc_context->set_surface_state(ctx, encoder_context);
812     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
813     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
814     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
815     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
816     mfc_context->avc_qm_state(ctx, encoder_context);
817     mfc_context->avc_fqm_state(ctx, encoder_context);
818     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
819     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
820 }
821
822
823 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
824                              struct encode_state *encode_state,
825                              struct intel_encoder_context *encoder_context)
826 {
827     struct intel_batchbuffer *batch = encoder_context->base.batch;
828
829     intel_batchbuffer_flush(batch);             //run the pipeline
830
831     return VA_STATUS_SUCCESS;
832 }
833
834
835 static VAStatus
836 gen75_mfc_stop(VADriverContextP ctx, 
837               struct encode_state *encode_state,
838               struct intel_encoder_context *encoder_context,
839               int *encoded_bits_size)
840 {
841     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
842     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
843     VACodedBufferSegment *coded_buffer_segment;
844     
845     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
846     assert(vaStatus == VA_STATUS_SUCCESS);
847     *encoded_bits_size = coded_buffer_segment->size * 8;
848     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
849
850     return VA_STATUS_SUCCESS;
851 }
852
853
854 static void
855 gen75_mfc_avc_slice_state(VADriverContextP ctx,
856                          VAEncPictureParameterBufferH264 *pic_param,
857                          VAEncSliceParameterBufferH264 *slice_param,
858                          struct encode_state *encode_state,
859                          struct intel_encoder_context *encoder_context,
860                          int rate_control_enable,
861                          int qp,
862                          struct intel_batchbuffer *batch)
863 {
864     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
865     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
866     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
867     int beginmb = slice_param->macroblock_address;
868     int endmb = beginmb + slice_param->num_macroblocks;
869     int beginx = beginmb % width_in_mbs;
870     int beginy = beginmb / width_in_mbs;
871     int nextx =  endmb % width_in_mbs;
872     int nexty = endmb / width_in_mbs;
873     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
874     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
875     int maxQpN, maxQpP;
876     unsigned char correct[6], grow, shrink;
877     int i;
878     int bslice = 0;
879     int weighted_pred_idc = 0;
880     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
881     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
882
883     if (batch == NULL)
884         batch = encoder_context->base.batch;
885
886     if (slice_type == SLICE_TYPE_P) {
887         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
888     } else if (slice_type == SLICE_TYPE_B) {
889         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
890         bslice = 1;
891
892         if (weighted_pred_idc == 2) {
893             /* 8.4.3 - Derivation process for prediction weights (8-279) */
894             luma_log2_weight_denom = 5;
895             chroma_log2_weight_denom = 5;
896         }
897     }
898
899     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
900     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
901
902     for (i = 0; i < 6; i++)
903         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
904
905     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
906         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
907     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
908         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
909
910     BEGIN_BCS_BATCH(batch, 11);;
911
912     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
913     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
914
915     if (slice_type == SLICE_TYPE_I) {
916         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
917     } else {
918         OUT_BCS_BATCH(batch,
919                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
920                       (chroma_log2_weight_denom << 8) |
921                       (luma_log2_weight_denom << 0));
922     }
923
924     OUT_BCS_BATCH(batch, 
925                   (weighted_pred_idc << 30) |
926                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
927                   (slice_param->disable_deblocking_filter_idc << 27) |
928                   (slice_param->cabac_init_idc << 24) |
929                   (qp<<16) |                    /*Slice Quantization Parameter*/
930                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
931                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
932     OUT_BCS_BATCH(batch,
933                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
934                   (beginx << 16) |
935                   slice_param->macroblock_address );
936     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
937     OUT_BCS_BATCH(batch, 
938                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
939                   (1 << 30) |           /*ResetRateControlCounter*/
940                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
941                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
942                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
943                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
944                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
945                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
946                   (last_slice << 19) |     /*IsLastSlice*/
947                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
948                   (1 << 17) |       /*HeaderPresentFlag*/       
949                   (1 << 16) |       /*SliceData PresentFlag*/
950                   (1 << 15) |       /*TailPresentFlag*/
951                   (1 << 13) |       /*RBSP NAL TYPE*/   
952                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
953     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
954     OUT_BCS_BATCH(batch,
955                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
956                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
957                   (shrink << 8)  |
958                   (grow << 0));   
959     OUT_BCS_BATCH(batch,
960                   (correct[5] << 20) |
961                   (correct[4] << 16) |
962                   (correct[3] << 12) |
963                   (correct[2] << 8) |
964                   (correct[1] << 4) |
965                   (correct[0] << 0));
966     OUT_BCS_BATCH(batch, 0);
967
968     ADVANCE_BCS_BATCH(batch);
969 }
970
971
972 #ifdef MFC_SOFTWARE_HASWELL
973
974 static int
975 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
976                                 int qp,unsigned int *msg,
977                               struct intel_encoder_context *encoder_context,
978                               unsigned char target_mb_size, unsigned char max_mb_size,
979                               struct intel_batchbuffer *batch)
980 {
981     int len_in_dwords = 12;
982     unsigned int intra_msg;
983 #define         INTRA_MSG_FLAG          (1 << 13)
984 #define         INTRA_MBTYPE_MASK       (0x1F0000)
985     if (batch == NULL)
986         batch = encoder_context->base.batch;
987
988     BEGIN_BCS_BATCH(batch, len_in_dwords);
989
990     intra_msg = msg[0] & 0xC0FF;
991     intra_msg |= INTRA_MSG_FLAG;
992     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
993     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
994     OUT_BCS_BATCH(batch, 0);
995     OUT_BCS_BATCH(batch, 0);
996     OUT_BCS_BATCH(batch, 
997                   (0 << 24) |           /* PackedMvNum, Debug*/
998                   (0 << 20) |           /* No motion vector */
999                   (1 << 19) |           /* CbpDcY */
1000                   (1 << 18) |           /* CbpDcU */
1001                   (1 << 17) |           /* CbpDcV */
1002                   intra_msg);
1003
1004     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1005     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1006     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1007
1008     /*Stuff for Intra MB*/
1009     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1010     OUT_BCS_BATCH(batch, msg[2]);       
1011     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1012     
1013     /*MaxSizeInWord and TargetSzieInWord*/
1014     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1015                   (target_mb_size << 16) );
1016
1017     OUT_BCS_BATCH(batch, 0);
1018
1019     ADVANCE_BCS_BATCH(batch);
1020
1021     return len_in_dwords;
1022 }
1023
1024 static int
1025 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1026                               unsigned int *msg, unsigned int offset,
1027                               struct intel_encoder_context *encoder_context,
1028                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1029                               struct intel_batchbuffer *batch)
1030 {
1031     int len_in_dwords = 12;
1032         unsigned int inter_msg = 0;
1033     if (batch == NULL)
1034         batch = encoder_context->base.batch;
1035     {
1036 #define MSG_MV_OFFSET   4
1037         unsigned int *mv_ptr;
1038         mv_ptr = msg + MSG_MV_OFFSET;
1039         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1040          * to convert them to be compatible with the format of AVC_PAK
1041          * command.
1042          */
1043         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1044                 /* MV[0] and MV[2] are replicated */
1045                 mv_ptr[4] = mv_ptr[0];
1046                 mv_ptr[5] = mv_ptr[1];
1047                 mv_ptr[2] = mv_ptr[8];
1048                 mv_ptr[3] = mv_ptr[9];
1049                 mv_ptr[6] = mv_ptr[8]; 
1050                 mv_ptr[7] = mv_ptr[9]; 
1051         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1052                 /* MV[0] and MV[1] are replicated */
1053                 mv_ptr[2] = mv_ptr[0];  
1054                 mv_ptr[3] = mv_ptr[1];
1055                 mv_ptr[4] = mv_ptr[16]; 
1056                 mv_ptr[5] = mv_ptr[17]; 
1057                 mv_ptr[6] = mv_ptr[24];
1058                 mv_ptr[7] = mv_ptr[25];
1059         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1060                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1061                 /* Don't touch MV[0] or MV[1] */
1062                 mv_ptr[2] = mv_ptr[8];
1063                 mv_ptr[3] = mv_ptr[9];
1064                 mv_ptr[4] = mv_ptr[16];
1065                 mv_ptr[5] = mv_ptr[17];
1066                 mv_ptr[6] = mv_ptr[24];
1067                 mv_ptr[7] = mv_ptr[25];
1068         }
1069     }
1070
1071     BEGIN_BCS_BATCH(batch, len_in_dwords);
1072
1073     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1074
1075         inter_msg = 32;
1076         /* MV quantity */
1077         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1078                 if (msg[1] & SUBMB_SHAPE_MASK)
1079                         inter_msg = 128;
1080         }
1081     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1082     OUT_BCS_BATCH(batch, offset);
1083         inter_msg = msg[0] & (0x1F00FFFF);
1084         inter_msg |= INTER_MV8;
1085         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1086         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1087                         (msg[1] & SUBMB_SHAPE_MASK)) {
1088                 inter_msg |= INTER_MV32;
1089         }
1090
1091     OUT_BCS_BATCH(batch, inter_msg);
1092
1093     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1094     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1095 #if 0 
1096     if ( slice_type == SLICE_TYPE_B) {
1097         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1098     } else {
1099         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1100     }
1101 #else
1102     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1103 #endif
1104
1105         inter_msg = msg[1] >> 8;
1106     /*Stuff for Inter MB*/
1107     OUT_BCS_BATCH(batch, inter_msg);        
1108     OUT_BCS_BATCH(batch, 0x0);    
1109     OUT_BCS_BATCH(batch, 0x0);        
1110
1111     /*MaxSizeInWord and TargetSzieInWord*/
1112     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1113                   (target_mb_size << 16) );
1114
1115     OUT_BCS_BATCH(batch, 0x0);    
1116
1117     ADVANCE_BCS_BATCH(batch);
1118
1119     return len_in_dwords;
1120 }
1121
1122 #define         AVC_INTRA_RDO_OFFSET    4
1123 #define         AVC_INTER_RDO_OFFSET    10
1124 #define         AVC_INTER_MSG_OFFSET    8       
1125 #define         AVC_INTER_MV_OFFSET             48
1126 #define         AVC_RDO_MASK            0xFFFF
1127
1128 static void 
1129 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1130                                        struct encode_state *encode_state,
1131                                        struct intel_encoder_context *encoder_context,
1132                                        int slice_index,
1133                                        struct intel_batchbuffer *slice_batch)
1134 {
1135     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1136     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1137     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1138     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1139     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1140     unsigned int *msg = NULL, offset = 0;
1141     unsigned char *msg_ptr = NULL;
1142     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1143     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1144     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1145     int i,x,y;
1146     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1147     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1148     unsigned char *slice_header = NULL;
1149     int slice_header_length_in_bits = 0;
1150     unsigned int tail_data[] = { 0x0, 0x0 };
1151     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1152     int is_intra = slice_type == SLICE_TYPE_I;
1153
1154     if (rate_control_mode == VA_RC_CBR) {
1155         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1156         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1157     }
1158
1159     /* only support for 8-bit pixel bit-depth */
1160     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1161     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1162     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1163     assert(qp >= 0 && qp < 52);
1164
1165     gen75_mfc_avc_slice_state(ctx, 
1166                              pPicParameter,
1167                              pSliceParameter,
1168                              encode_state, encoder_context,
1169                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1170
1171     if ( slice_index == 0) 
1172         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1173
1174     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1175
1176     // slice hander
1177     mfc_context->insert_object(ctx, encoder_context,
1178                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1179                                5,  /* first 5 bytes are start code + nal unit type */
1180                                1, 0, 1, slice_batch);
1181
1182     dri_bo_map(vme_context->vme_output.bo , 1);
1183     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1184
1185     if (is_intra) {
1186         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1187     } else {
1188         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1189     }
1190    
1191     for (i = pSliceParameter->macroblock_address; 
1192          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1193         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1194         x = i % width_in_mbs;
1195         y = i / width_in_mbs;
1196         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1197
1198         if (is_intra) {
1199             assert(msg);
1200             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1201         } else {
1202             int inter_rdo, intra_rdo;
1203             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1204             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1205             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1206             if (intra_rdo < inter_rdo) { 
1207                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1208             } else {
1209                 msg += AVC_INTER_MSG_OFFSET;
1210                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1211             }
1212         }
1213     }
1214    
1215     dri_bo_unmap(vme_context->vme_output.bo);
1216
1217     if ( last_slice ) {    
1218         mfc_context->insert_object(ctx, encoder_context,
1219                                    tail_data, 2, 8,
1220                                    2, 1, 1, 0, slice_batch);
1221     } else {
1222         mfc_context->insert_object(ctx, encoder_context,
1223                                    tail_data, 1, 8,
1224                                    1, 1, 1, 0, slice_batch);
1225     }
1226
1227     free(slice_header);
1228
1229 }
1230
1231 static dri_bo *
1232 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1233                                   struct encode_state *encode_state,
1234                                   struct intel_encoder_context *encoder_context)
1235 {
1236     struct i965_driver_data *i965 = i965_driver_data(ctx);
1237     struct intel_batchbuffer *batch;
1238     dri_bo *batch_bo;
1239     int i;
1240     int buffer_size;
1241     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1242     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1243     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1244
1245     buffer_size = width_in_mbs * height_in_mbs * 64;
1246     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1247     batch_bo = batch->buffer;
1248     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1249         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1250     }
1251
1252     intel_batchbuffer_align(batch, 8);
1253     
1254     BEGIN_BCS_BATCH(batch, 2);
1255     OUT_BCS_BATCH(batch, 0);
1256     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1257     ADVANCE_BCS_BATCH(batch);
1258
1259     dri_bo_reference(batch_bo);
1260     intel_batchbuffer_free(batch);
1261
1262     return batch_bo;
1263 }
1264
1265 #else
1266
1267 static void
1268 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1269                                     struct encode_state *encode_state,
1270                                     struct intel_encoder_context *encoder_context)
1271
1272 {
1273     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1274     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1275
1276     assert(vme_context->vme_output.bo);
1277     mfc_context->buffer_suface_setup(ctx,
1278                                      &mfc_context->gpe_context,
1279                                      &vme_context->vme_output,
1280                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1281                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1282     assert(mfc_context->aux_batchbuffer_surface.bo);
1283     mfc_context->buffer_suface_setup(ctx,
1284                                      &mfc_context->gpe_context,
1285                                      &mfc_context->aux_batchbuffer_surface,
1286                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1287                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1288 }
1289
1290 static void
1291 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1292                                      struct encode_state *encode_state,
1293                                      struct intel_encoder_context *encoder_context)
1294
1295 {
1296     struct i965_driver_data *i965 = i965_driver_data(ctx);
1297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1299     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1300     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1301     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1302     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1303     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1304     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1305                                                            "MFC batchbuffer",
1306                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1307                                                            0x1000);
1308     mfc_context->buffer_suface_setup(ctx,
1309                                      &mfc_context->gpe_context,
1310                                      &mfc_context->mfc_batchbuffer_surface,
1311                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1312                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1313 }
1314
1315 static void
1316 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1317                                     struct encode_state *encode_state,
1318                                     struct intel_encoder_context *encoder_context)
1319 {
1320     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1321     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1322 }
1323
1324 static void
1325 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1326                                 struct encode_state *encode_state,
1327                                 struct intel_encoder_context *encoder_context)
1328 {
1329     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1330     struct gen6_interface_descriptor_data *desc;   
1331     int i;
1332     dri_bo *bo;
1333
1334     bo = mfc_context->gpe_context.idrt.bo;
1335     dri_bo_map(bo, 1);
1336     assert(bo->virtual);
1337     desc = bo->virtual;
1338
1339     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1340         struct i965_kernel *kernel;
1341
1342         kernel = &mfc_context->gpe_context.kernels[i];
1343         assert(sizeof(*desc) == 32);
1344
1345         /*Setup the descritor table*/
1346         memset(desc, 0, sizeof(*desc));
1347         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1348         desc->desc2.sampler_count = 0;
1349         desc->desc2.sampler_state_pointer = 0;
1350         desc->desc3.binding_table_entry_count = 2;
1351         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1352         desc->desc4.constant_urb_entry_read_offset = 0;
1353         desc->desc4.constant_urb_entry_read_length = 4;
1354                 
1355         /*kernel start*/
1356         dri_bo_emit_reloc(bo,   
1357                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1358                           0,
1359                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1360                           kernel->bo);
1361         desc++;
1362     }
1363
1364     dri_bo_unmap(bo);
1365 }
1366
1367 static void
1368 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1369                                     struct encode_state *encode_state,
1370                                     struct intel_encoder_context *encoder_context)
1371 {
1372     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1373     
1374     (void)mfc_context;
1375 }
1376
1377 static void
1378 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1379                                          int index,
1380                                          int head_offset,
1381                                          int batchbuffer_offset,
1382                                          int head_size,
1383                                          int tail_size,
1384                                          int number_mb_cmds,
1385                                          int first_object,
1386                                          int last_object,
1387                                          int last_slice,
1388                                          int mb_x,
1389                                          int mb_y,
1390                                          int width_in_mbs,
1391                                          int qp)
1392 {
1393     BEGIN_BATCH(batch, 12);
1394     
1395     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1396     OUT_BATCH(batch, index);
1397     OUT_BATCH(batch, 0);
1398     OUT_BATCH(batch, 0);
1399     OUT_BATCH(batch, 0);
1400     OUT_BATCH(batch, 0);
1401    
1402     /*inline data */
1403     OUT_BATCH(batch, head_offset);
1404     OUT_BATCH(batch, batchbuffer_offset);
1405     OUT_BATCH(batch, 
1406               head_size << 16 |
1407               tail_size);
1408     OUT_BATCH(batch,
1409               number_mb_cmds << 16 |
1410               first_object << 2 |
1411               last_object << 1 |
1412               last_slice);
1413     OUT_BATCH(batch,
1414               mb_y << 8 |
1415               mb_x);
1416     OUT_BATCH(batch,
1417               qp << 16 |
1418               width_in_mbs);
1419
1420     ADVANCE_BATCH(batch);
1421 }
1422
1423 static void
1424 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1425                                        struct intel_encoder_context *encoder_context,
1426                                        VAEncSliceParameterBufferH264 *slice_param,
1427                                        int head_offset,
1428                                        unsigned short head_size,
1429                                        unsigned short tail_size,
1430                                        int batchbuffer_offset,
1431                                        int qp,
1432                                        int last_slice)
1433 {
1434     struct intel_batchbuffer *batch = encoder_context->base.batch;
1435     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1436     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1437     int total_mbs = slice_param->num_macroblocks;
1438     int number_mb_cmds = 128;
1439     int starting_mb = 0;
1440     int last_object = 0;
1441     int first_object = 1;
1442     int i;
1443     int mb_x, mb_y;
1444     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1445
1446     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1447         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1448         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1449         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1450         assert(mb_x <= 255 && mb_y <= 255);
1451
1452         starting_mb += number_mb_cmds;
1453
1454         gen75_mfc_batchbuffer_emit_object_command(batch,
1455                                                  index,
1456                                                  head_offset,
1457                                                  batchbuffer_offset,
1458                                                  head_size,
1459                                                  tail_size,
1460                                                  number_mb_cmds,
1461                                                  first_object,
1462                                                  last_object,
1463                                                  last_slice,
1464                                                  mb_x,
1465                                                  mb_y,
1466                                                  width_in_mbs,
1467                                                  qp);
1468
1469         if (first_object) {
1470             head_offset += head_size;
1471             batchbuffer_offset += head_size;
1472         }
1473
1474         if (last_object) {
1475             head_offset += tail_size;
1476             batchbuffer_offset += tail_size;
1477         }
1478
1479         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1480
1481         first_object = 0;
1482     }
1483
1484     if (!last_object) {
1485         last_object = 1;
1486         number_mb_cmds = total_mbs % number_mb_cmds;
1487         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1488         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1489         assert(mb_x <= 255 && mb_y <= 255);
1490         starting_mb += number_mb_cmds;
1491
1492         gen75_mfc_batchbuffer_emit_object_command(batch,
1493                                                  index,
1494                                                  head_offset,
1495                                                  batchbuffer_offset,
1496                                                  head_size,
1497                                                  tail_size,
1498                                                  number_mb_cmds,
1499                                                  first_object,
1500                                                  last_object,
1501                                                  last_slice,
1502                                                  mb_x,
1503                                                  mb_y,
1504                                                  width_in_mbs,
1505                                                  qp);
1506     }
1507 }
1508                           
1509 /*
1510  * return size in Owords (16bytes)
1511  */         
1512 static int
1513 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1514                                struct encode_state *encode_state,
1515                                struct intel_encoder_context *encoder_context,
1516                                int slice_index,
1517                                int batchbuffer_offset)
1518 {
1519     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1520     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1521     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1522     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1523     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1524     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1525     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1526     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1527     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1528     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1529     unsigned char *slice_header = NULL;
1530     int slice_header_length_in_bits = 0;
1531     unsigned int tail_data[] = { 0x0, 0x0 };
1532     long head_offset;
1533     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1534     unsigned short head_size, tail_size;
1535     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1536
1537     if (rate_control_mode == VA_RC_CBR) {
1538         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1539         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1540     }
1541
1542     /* only support for 8-bit pixel bit-depth */
1543     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1544     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1545     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1546     assert(qp >= 0 && qp < 52);
1547
1548     head_offset = old_used / 16;
1549     gen75_mfc_avc_slice_state(ctx,
1550                              pPicParameter,
1551                              pSliceParameter,
1552                              encode_state,
1553                              encoder_context,
1554                              (rate_control_mode == VA_RC_CBR),
1555                              qp,
1556                              slice_batch);
1557
1558     if (slice_index == 0)
1559         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1560
1561     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1562
1563     // slice hander
1564     mfc_context->insert_object(ctx,
1565                                encoder_context,
1566                                (unsigned int *)slice_header,
1567                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1568                                slice_header_length_in_bits & 0x1f,
1569                                5,  /* first 5 bytes are start code + nal unit type */
1570                                1,
1571                                0,
1572                                1,
1573                                slice_batch);
1574     free(slice_header);
1575
1576     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1577     used = intel_batchbuffer_used_size(slice_batch);
1578     head_size = (used - old_used) / 16;
1579     old_used = used;
1580
1581     /* tail */
1582     if (last_slice) {    
1583         mfc_context->insert_object(ctx,
1584                                    encoder_context,
1585                                    tail_data,
1586                                    2,
1587                                    8,
1588                                    2,
1589                                    1,
1590                                    1,
1591                                    0,
1592                                    slice_batch);
1593     } else {
1594         mfc_context->insert_object(ctx,
1595                                    encoder_context,
1596                                    tail_data,
1597                                    1,
1598                                    8,
1599                                    1,
1600                                    1,
1601                                    1,
1602                                    0,
1603                                    slice_batch);
1604     }
1605
1606     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1607     used = intel_batchbuffer_used_size(slice_batch);
1608     tail_size = (used - old_used) / 16;
1609
1610    
1611     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1612                                            encoder_context,
1613                                            pSliceParameter,
1614                                            head_offset,
1615                                            head_size,
1616                                            tail_size,
1617                                            batchbuffer_offset,
1618                                            qp,
1619                                            last_slice);
1620
1621     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1622 }
1623
1624 static void
1625 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1626                                   struct encode_state *encode_state,
1627                                   struct intel_encoder_context *encoder_context)
1628 {
1629     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1630     struct intel_batchbuffer *batch = encoder_context->base.batch;
1631     int i, size, offset = 0;
1632     intel_batchbuffer_start_atomic(batch, 0x4000); 
1633     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1634
1635     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1636         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1637         offset += size;
1638     }
1639
1640     intel_batchbuffer_end_atomic(batch);
1641     intel_batchbuffer_flush(batch);
1642 }
1643
1644 static void
1645 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1646                                struct encode_state *encode_state,
1647                                struct intel_encoder_context *encoder_context)
1648 {
1649     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1650     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1651     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1652     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1653 }
1654
1655 static dri_bo *
1656 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1657                                   struct encode_state *encode_state,
1658                                   struct intel_encoder_context *encoder_context)
1659 {
1660     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1661
1662     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1663     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1664
1665     return mfc_context->mfc_batchbuffer_surface.bo;
1666 }
1667
1668 #endif
1669
1670 static void
1671 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1672                                  struct encode_state *encode_state,
1673                                  struct intel_encoder_context *encoder_context)
1674 {
1675     struct intel_batchbuffer *batch = encoder_context->base.batch;
1676     dri_bo *slice_batch_bo;
1677
1678     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1679         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1680         assert(0);
1681         return; 
1682     }
1683
1684 #ifdef MFC_SOFTWARE_HASWELL
1685     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1686 #else
1687     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1688 #endif
1689
1690     // begin programing
1691     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1692     intel_batchbuffer_emit_mi_flush(batch);
1693     
1694     // picture level programing
1695     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1696
1697     BEGIN_BCS_BATCH(batch, 2);
1698     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1699     OUT_BCS_RELOC(batch,
1700                   slice_batch_bo,
1701                   I915_GEM_DOMAIN_COMMAND, 0, 
1702                   0);
1703     ADVANCE_BCS_BATCH(batch);
1704
1705     // end programing
1706     intel_batchbuffer_end_atomic(batch);
1707
1708     dri_bo_unreference(slice_batch_bo);
1709 }
1710
1711
1712 static VAStatus
1713 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1714                             struct encode_state *encode_state,
1715                             struct intel_encoder_context *encoder_context)
1716 {
1717     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1718     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1719     int current_frame_bits_size;
1720     int sts;
1721  
1722     for (;;) {
1723         gen75_mfc_init(ctx, encode_state, encoder_context);
1724         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1725         /*Programing bcs pipeline*/
1726         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1727         gen75_mfc_run(ctx, encode_state, encoder_context);
1728         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1729             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1730             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1731             if (sts == BRC_NO_HRD_VIOLATION) {
1732                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1733                 break;
1734             }
1735             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1736                 if (!mfc_context->hrd.violation_noted) {
1737                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1738                     mfc_context->hrd.violation_noted = 1;
1739                 }
1740                 return VA_STATUS_SUCCESS;
1741             }
1742         } else {
1743             break;
1744         }
1745     }
1746
1747     return VA_STATUS_SUCCESS;
1748 }
1749
1750 /*
1751  * MPEG-2
1752  */
1753
1754 static const int
1755 va_to_gen75_mpeg2_picture_type[3] = {
1756     1,  /* I */
1757     2,  /* P */
1758     3   /* B */
1759 };
1760
1761 static void
1762 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1763                           struct intel_encoder_context *encoder_context,
1764                           struct encode_state *encode_state)
1765 {
1766     struct intel_batchbuffer *batch = encoder_context->base.batch;
1767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1768     VAEncPictureParameterBufferMPEG2 *pic_param;
1769     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1770     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1771
1772     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1773     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1774
1775     BEGIN_BCS_BATCH(batch, 13);
1776     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1777     OUT_BCS_BATCH(batch,
1778                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1779                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1780                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1781                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1782                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1783                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1784                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1785                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1786                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1787                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1788                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1789                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1790     OUT_BCS_BATCH(batch,
1791                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1792                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1793                   0);
1794     OUT_BCS_BATCH(batch,
1795                   1 << 31 |     /* slice concealment */
1796                   (height_in_mbs - 1) << 16 |
1797                   (width_in_mbs - 1));
1798     OUT_BCS_BATCH(batch, 0);
1799     OUT_BCS_BATCH(batch, 0);
1800     OUT_BCS_BATCH(batch,
1801                   0xFFF << 16 | /* InterMBMaxSize */
1802                   0xFFF << 0 |  /* IntraMBMaxSize */
1803                   0);
1804     OUT_BCS_BATCH(batch, 0);
1805     OUT_BCS_BATCH(batch, 0);
1806     OUT_BCS_BATCH(batch, 0);
1807     OUT_BCS_BATCH(batch, 0);
1808     OUT_BCS_BATCH(batch, 0);
1809     OUT_BCS_BATCH(batch, 0);
1810     ADVANCE_BCS_BATCH(batch);
1811 }
1812
1813 static void
1814 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1815 {
1816     unsigned char intra_qm[64] = {
1817          8, 16, 19, 22, 26, 27, 29, 34,
1818         16, 16, 22, 24, 27, 29, 34, 37,
1819         19, 22, 26, 27, 29, 34, 34, 38,
1820         22, 22, 26, 27, 29, 34, 37, 40,
1821         22, 26, 27, 29, 32, 35, 40, 48,
1822         26, 27, 29, 32, 35, 40, 48, 58,
1823         26, 27, 29, 34, 38, 46, 56, 69,
1824         27, 29, 35, 38, 46, 56, 69, 83
1825     };
1826
1827     unsigned char non_intra_qm[64] = {
1828         16, 16, 16, 16, 16, 16, 16, 16,
1829         16, 16, 16, 16, 16, 16, 16, 16,
1830         16, 16, 16, 16, 16, 16, 16, 16,
1831         16, 16, 16, 16, 16, 16, 16, 16,
1832         16, 16, 16, 16, 16, 16, 16, 16,
1833         16, 16, 16, 16, 16, 16, 16, 16,
1834         16, 16, 16, 16, 16, 16, 16, 16,
1835         16, 16, 16, 16, 16, 16, 16, 16
1836     };
1837
1838     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1839     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1840 }
1841
1842 static void
1843 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1844 {
1845     unsigned short intra_fqm[64] = {
1846          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1847          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1848          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1849          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1850          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1851          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1852          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1853          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1854     };
1855
1856     unsigned short non_intra_fqm[64] = {
1857         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1858         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1859         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1860         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1861         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1862         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1863         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1864         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1865     };
1866
1867     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1868     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1869 }
1870
1871 static void
1872 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1873                                  struct intel_encoder_context *encoder_context,
1874                                  int x, int y,
1875                                  int next_x, int next_y,
1876                                  int is_fisrt_slice_group,
1877                                  int is_last_slice_group,
1878                                  int intra_slice,
1879                                  int qp,
1880                                  struct intel_batchbuffer *batch)
1881 {
1882     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1883
1884     if (batch == NULL)
1885         batch = encoder_context->base.batch;
1886
1887     BEGIN_BCS_BATCH(batch, 8);
1888
1889     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1890     OUT_BCS_BATCH(batch,
1891                   0 << 31 |                             /* MbRateCtrlFlag */
1892                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1893                   1 << 17 |                             /* Insert Header before the first slice group data */
1894                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1895                   1 << 15 |                             /* TailPresentFlag: always 1 */
1896                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1897                   !!intra_slice << 13 |                 /* IntraSlice */
1898                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1899                   0);
1900     OUT_BCS_BATCH(batch,
1901                   next_y << 24 |
1902                   next_x << 16 |
1903                   y << 8 |
1904                   x << 0 |
1905                   0);
1906     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1907     /* bitstream pointer is only loaded once for the first slice of a frame when 
1908      * LoadSlicePointerFlag is 0
1909      */
1910     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1911     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1912     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1913     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1914
1915     ADVANCE_BCS_BATCH(batch);
1916 }
1917
1918 static int
1919 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1920                                  struct intel_encoder_context *encoder_context,
1921                                  int x, int y,
1922                                  int first_mb_in_slice,
1923                                  int last_mb_in_slice,
1924                                  int first_mb_in_slice_group,
1925                                  int last_mb_in_slice_group,
1926                                  int mb_type,
1927                                  int qp_scale_code,
1928                                  int coded_block_pattern,
1929                                  unsigned char target_size_in_word,
1930                                  unsigned char max_size_in_word,
1931                                  struct intel_batchbuffer *batch)
1932 {
1933     int len_in_dwords = 9;
1934
1935     if (batch == NULL)
1936         batch = encoder_context->base.batch;
1937
1938     BEGIN_BCS_BATCH(batch, len_in_dwords);
1939
1940     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1941     OUT_BCS_BATCH(batch,
1942                   0 << 24 |     /* PackedMvNum */
1943                   0 << 20 |     /* MvFormat */
1944                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1945                   0 << 15 |     /* TransformFlag: frame DCT */
1946                   0 << 14 |     /* FieldMbFlag */
1947                   1 << 13 |     /* IntraMbFlag */
1948                   mb_type << 8 |   /* MbType: Intra */
1949                   0 << 2 |      /* SkipMbFlag */
1950                   0 << 0 |      /* InterMbMode */
1951                   0);
1952     OUT_BCS_BATCH(batch, y << 16 | x);
1953     OUT_BCS_BATCH(batch,
1954                   max_size_in_word << 24 |
1955                   target_size_in_word << 16 |
1956                   coded_block_pattern << 6 |      /* CBP */
1957                   0);
1958     OUT_BCS_BATCH(batch,
1959                   last_mb_in_slice << 31 |
1960                   first_mb_in_slice << 30 |
1961                   0 << 27 |     /* EnableCoeffClamp */
1962                   last_mb_in_slice_group << 26 |
1963                   0 << 25 |     /* MbSkipConvDisable */
1964                   first_mb_in_slice_group << 24 |
1965                   0 << 16 |     /* MvFieldSelect */
1966                   qp_scale_code << 0 |
1967                   0);
1968     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1969     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1970     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1971     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1972
1973     ADVANCE_BCS_BATCH(batch);
1974
1975     return len_in_dwords;
1976 }
1977
1978 #define MPEG2_INTER_MV_OFFSET   12 
1979
1980 static struct _mv_ranges
1981 {
1982     int low;    /* in the unit of 1/2 pixel */
1983     int high;   /* in the unit of 1/2 pixel */
1984 } mv_ranges[] = {
1985     {0, 0},
1986     {-16, 15},
1987     {-32, 31},
1988     {-64, 63},
1989     {-128, 127},
1990     {-256, 255},
1991     {-512, 511},
1992     {-1024, 1023},
1993     {-2048, 2047},
1994     {-4096, 4095}
1995 };
1996
1997 static int
1998 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1999 {
2000     if (mv + pos * 16 * 2 < 0 ||
2001         mv + (pos + 1) * 16 * 2 > display_max * 2)
2002         mv = 0;
2003
2004     if (f_code > 0 && f_code < 10) {
2005         if (mv < mv_ranges[f_code].low)
2006             mv = mv_ranges[f_code].low;
2007
2008         if (mv > mv_ranges[f_code].high)
2009             mv = mv_ranges[f_code].high;
2010     }
2011
2012     return mv;
2013 }
2014
2015 static int
2016 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2017                                  struct encode_state *encode_state,
2018                                  struct intel_encoder_context *encoder_context,
2019                                  unsigned int *msg,
2020                                  int width_in_mbs, int height_in_mbs,
2021                                  int x, int y,
2022                                  int first_mb_in_slice,
2023                                  int last_mb_in_slice,
2024                                  int first_mb_in_slice_group,
2025                                  int last_mb_in_slice_group,
2026                                  int qp_scale_code,
2027                                  unsigned char target_size_in_word,
2028                                  unsigned char max_size_in_word,
2029                                  struct intel_batchbuffer *batch)
2030 {
2031     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2032     int len_in_dwords = 9;
2033     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2034     
2035     if (batch == NULL)
2036         batch = encoder_context->base.batch;
2037
2038     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2039     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2040     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2041     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2042     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2043
2044     BEGIN_BCS_BATCH(batch, len_in_dwords);
2045
2046     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2047     OUT_BCS_BATCH(batch,
2048                   2 << 24 |     /* PackedMvNum */
2049                   7 << 20 |     /* MvFormat */
2050                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2051                   0 << 15 |     /* TransformFlag: frame DCT */
2052                   0 << 14 |     /* FieldMbFlag */
2053                   0 << 13 |     /* IntraMbFlag */
2054                   1 << 8 |      /* MbType: Frame-based */
2055                   0 << 2 |      /* SkipMbFlag */
2056                   0 << 0 |      /* InterMbMode */
2057                   0);
2058     OUT_BCS_BATCH(batch, y << 16 | x);
2059     OUT_BCS_BATCH(batch,
2060                   max_size_in_word << 24 |
2061                   target_size_in_word << 16 |
2062                   0x3f << 6 |   /* CBP */
2063                   0);
2064     OUT_BCS_BATCH(batch,
2065                   last_mb_in_slice << 31 |
2066                   first_mb_in_slice << 30 |
2067                   0 << 27 |     /* EnableCoeffClamp */
2068                   last_mb_in_slice_group << 26 |
2069                   0 << 25 |     /* MbSkipConvDisable */
2070                   first_mb_in_slice_group << 24 |
2071                   0 << 16 |     /* MvFieldSelect */
2072                   qp_scale_code << 0 |
2073                   0);
2074
2075     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2076     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2077     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2078     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2079
2080     ADVANCE_BCS_BATCH(batch);
2081
2082     return len_in_dwords;
2083 }
2084
2085 static void
2086 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2087                                            struct encode_state *encode_state,
2088                                            struct intel_encoder_context *encoder_context,
2089                                            struct intel_batchbuffer *slice_batch)
2090 {
2091     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2092     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2093
2094     if (encode_state->packed_header_data[idx]) {
2095         VAEncPackedHeaderParameterBuffer *param = NULL;
2096         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2097         unsigned int length_in_bits;
2098
2099         assert(encode_state->packed_header_param[idx]);
2100         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2101         length_in_bits = param->bit_length;
2102
2103         mfc_context->insert_object(ctx,
2104                                    encoder_context,
2105                                    header_data,
2106                                    ALIGN(length_in_bits, 32) >> 5,
2107                                    length_in_bits & 0x1f,
2108                                    5,   /* FIXME: check it */
2109                                    0,
2110                                    0,
2111                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2112                                    slice_batch);
2113     }
2114
2115     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2116
2117     if (encode_state->packed_header_data[idx]) {
2118         VAEncPackedHeaderParameterBuffer *param = NULL;
2119         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2120         unsigned int length_in_bits;
2121
2122         assert(encode_state->packed_header_param[idx]);
2123         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2124         length_in_bits = param->bit_length;
2125
2126         mfc_context->insert_object(ctx,
2127                                    encoder_context,
2128                                    header_data,
2129                                    ALIGN(length_in_bits, 32) >> 5,
2130                                    length_in_bits & 0x1f,
2131                                    5,   /* FIXME: check it */
2132                                    0,
2133                                    0,
2134                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2135                                    slice_batch);
2136     }
2137 }
2138
2139 static void 
2140 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2141                                      struct encode_state *encode_state,
2142                                      struct intel_encoder_context *encoder_context,
2143                                      int slice_index,
2144                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2145                                      struct intel_batchbuffer *slice_batch)
2146 {
2147     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2148     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2149     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2150     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2151     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2152     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2153     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2154     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2155     int i, j;
2156     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2157     unsigned int *msg = NULL;
2158     unsigned char *msg_ptr = NULL;
2159
2160     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2161     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2162     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2163     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2164
2165     dri_bo_map(vme_context->vme_output.bo , 0);
2166     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2167
2168     if (next_slice_group_param) {
2169         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2170         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2171     } else {
2172         h_next_start_pos = 0;
2173         v_next_start_pos = height_in_mbs;
2174     }
2175
2176     gen75_mfc_mpeg2_slicegroup_state(ctx,
2177                                      encoder_context,
2178                                      h_start_pos,
2179                                      v_start_pos,
2180                                      h_next_start_pos,
2181                                      v_next_start_pos,
2182                                      slice_index == 0,
2183                                      next_slice_group_param == NULL,
2184                                      slice_param->is_intra_slice,
2185                                      slice_param->quantiser_scale_code,
2186                                      slice_batch);
2187
2188     if (slice_index == 0) 
2189         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2190
2191     /* Insert '00' to make sure the header is valid */
2192     mfc_context->insert_object(ctx,
2193                                encoder_context,
2194                                (unsigned int*)section_delimiter,
2195                                1,
2196                                8,   /* 8bits in the last DWORD */
2197                                1,   /* 1 byte */
2198                                1,
2199                                0,
2200                                0,
2201                                slice_batch);
2202
2203     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2204         /* PAK for each macroblocks */
2205         for (j = 0; j < slice_param->num_macroblocks; j++) {
2206             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2207             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2208             int first_mb_in_slice = (j == 0);
2209             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2210             int first_mb_in_slice_group = (i == 0 && j == 0);
2211             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2212                                           j == slice_param->num_macroblocks - 1);
2213
2214             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2215
2216             if (slice_param->is_intra_slice) {
2217                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2218                                                  encoder_context,
2219                                                  h_pos, v_pos,
2220                                                  first_mb_in_slice,
2221                                                  last_mb_in_slice,
2222                                                  first_mb_in_slice_group,
2223                                                  last_mb_in_slice_group,
2224                                                  0x1a,
2225                                                  slice_param->quantiser_scale_code,
2226                                                  0x3f,
2227                                                  0,
2228                                                  0xff,
2229                                                  slice_batch);
2230             } else {
2231                 gen75_mfc_mpeg2_pak_object_inter(ctx,
2232                                                  encode_state,
2233                                                  encoder_context,
2234                                                  msg,
2235                                                  width_in_mbs, height_in_mbs,
2236                                                  h_pos, v_pos,
2237                                                  first_mb_in_slice,
2238                                                  last_mb_in_slice,
2239                                                  first_mb_in_slice_group,
2240                                                  last_mb_in_slice_group,
2241                                                  slice_param->quantiser_scale_code,
2242                                                  0,
2243                                                  0xff,
2244                                                  slice_batch);
2245             }
2246         }
2247
2248         slice_param++;
2249     }
2250
2251     dri_bo_unmap(vme_context->vme_output.bo);
2252
2253     /* tail data */
2254     if (next_slice_group_param == NULL) { /* end of a picture */
2255         mfc_context->insert_object(ctx,
2256                                    encoder_context,
2257                                    (unsigned int *)tail_delimiter,
2258                                    2,
2259                                    8,   /* 8bits in the last DWORD */
2260                                    5,   /* 5 bytes */
2261                                    1,
2262                                    1,
2263                                    0,
2264                                    slice_batch);
2265     } else {        /* end of a lsice group */
2266         mfc_context->insert_object(ctx,
2267                                    encoder_context,
2268                                    (unsigned int *)section_delimiter,
2269                                    1,
2270                                    8,   /* 8bits in the last DWORD */
2271                                    1,   /* 1 byte */
2272                                    1,
2273                                    1,
2274                                    0,
2275                                    slice_batch);
2276     }
2277 }
2278
2279 /* 
2280  * A batch buffer for all slices, including slice state, 
2281  * slice insert object and slice pak object commands
2282  *
2283  */
2284 static dri_bo *
2285 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2286                                            struct encode_state *encode_state,
2287                                            struct intel_encoder_context *encoder_context)
2288 {
2289     struct i965_driver_data *i965 = i965_driver_data(ctx);
2290     struct intel_batchbuffer *batch;
2291     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2292     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2293     dri_bo *batch_bo;
2294     int i;
2295     int buffer_size;
2296     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2297     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2298
2299     buffer_size = width_in_mbs * height_in_mbs * 64;
2300     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2301     batch_bo = batch->buffer;
2302
2303     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2304         if (i == encode_state->num_slice_params_ext - 1)
2305             next_slice_group_param = NULL;
2306         else
2307             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2308
2309         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2310     }
2311
2312     intel_batchbuffer_align(batch, 8);
2313     
2314     BEGIN_BCS_BATCH(batch, 2);
2315     OUT_BCS_BATCH(batch, 0);
2316     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2317     ADVANCE_BCS_BATCH(batch);
2318
2319     dri_bo_reference(batch_bo);
2320     intel_batchbuffer_free(batch);
2321
2322     return batch_bo;
2323 }
2324
2325 static void
2326 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2327                                             struct encode_state *encode_state,
2328                                             struct intel_encoder_context *encoder_context)
2329 {
2330     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2331
2332     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2333     mfc_context->set_surface_state(ctx, encoder_context);
2334     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2335     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2336     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2337     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2338     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2339     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2340 }
2341
2342 static void
2343 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2344                                     struct encode_state *encode_state,
2345                                     struct intel_encoder_context *encoder_context)
2346 {
2347     struct intel_batchbuffer *batch = encoder_context->base.batch;
2348     dri_bo *slice_batch_bo;
2349
2350     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2351
2352     // begin programing
2353     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2354     intel_batchbuffer_emit_mi_flush(batch);
2355     
2356     // picture level programing
2357     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2358
2359     BEGIN_BCS_BATCH(batch, 2);
2360     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2361     OUT_BCS_RELOC(batch,
2362                   slice_batch_bo,
2363                   I915_GEM_DOMAIN_COMMAND, 0, 
2364                   0);
2365     ADVANCE_BCS_BATCH(batch);
2366
2367     // end programing
2368     intel_batchbuffer_end_atomic(batch);
2369
2370     dri_bo_unreference(slice_batch_bo);
2371 }
2372
2373 static VAStatus
2374 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2375                         struct encode_state *encode_state,
2376                         struct intel_encoder_context *encoder_context)
2377 {
2378     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2379     struct object_surface *obj_surface; 
2380     struct object_buffer *obj_buffer;
2381     struct i965_coded_buffer_segment *coded_buffer_segment;
2382     VAStatus vaStatus = VA_STATUS_SUCCESS;
2383     dri_bo *bo;
2384     int i;
2385
2386     /* reconstructed surface */
2387     obj_surface = encode_state->reconstructed_object;
2388     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2389     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2390     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2391     mfc_context->surface_state.width = obj_surface->orig_width;
2392     mfc_context->surface_state.height = obj_surface->orig_height;
2393     mfc_context->surface_state.w_pitch = obj_surface->width;
2394     mfc_context->surface_state.h_pitch = obj_surface->height;
2395
2396     /* forward reference */
2397     obj_surface = encode_state->reference_objects[0];
2398
2399     if (obj_surface && obj_surface->bo) {
2400         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2401         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2402     } else
2403         mfc_context->reference_surfaces[0].bo = NULL;
2404
2405     /* backward reference */
2406     obj_surface = encode_state->reference_objects[1];
2407
2408     if (obj_surface && obj_surface->bo) {
2409         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2410         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2411     } else {
2412         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2413
2414         if (mfc_context->reference_surfaces[1].bo)
2415             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2416     }
2417
2418     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2419         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2420
2421         if (mfc_context->reference_surfaces[i].bo)
2422             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2423     }
2424     
2425     /* input YUV surface */
2426     obj_surface = encode_state->input_yuv_object;
2427     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2428     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2429
2430     /* coded buffer */
2431     obj_buffer = encode_state->coded_buf_object;
2432     bo = obj_buffer->buffer_store->bo;
2433     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2434     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2435     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2436     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2437
2438     /* set the internal flag to 0 to indicate the coded size is unknown */
2439     dri_bo_map(bo, 1);
2440     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2441     coded_buffer_segment->mapped = 0;
2442     coded_buffer_segment->codec = CODED_MPEG2;
2443     dri_bo_unmap(bo);
2444
2445     return vaStatus;
2446 }
2447
2448 static VAStatus
2449 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2450                                struct encode_state *encode_state,
2451                                struct intel_encoder_context *encoder_context)
2452 {
2453     gen75_mfc_init(ctx, encode_state, encoder_context);
2454     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2455     /*Programing bcs pipeline*/
2456     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2457     gen75_mfc_run(ctx, encode_state, encoder_context);
2458
2459     return VA_STATUS_SUCCESS;
2460 }
2461
2462 static void
2463 gen75_mfc_context_destroy(void *context)
2464 {
2465     struct gen6_mfc_context *mfc_context = context;
2466     int i;
2467
2468     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2469     mfc_context->post_deblocking_output.bo = NULL;
2470
2471     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2472     mfc_context->pre_deblocking_output.bo = NULL;
2473
2474     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2475     mfc_context->uncompressed_picture_source.bo = NULL;
2476
2477     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2478     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2479
2480     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2481         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2482         mfc_context->direct_mv_buffers[i].bo = NULL;
2483     }
2484
2485     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2486     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2487
2488     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2489     mfc_context->macroblock_status_buffer.bo = NULL;
2490
2491     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2492     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2493
2494     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2495     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2496
2497     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2498         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2499         mfc_context->reference_surfaces[i].bo = NULL;  
2500     }
2501
2502     i965_gpe_context_destroy(&mfc_context->gpe_context);
2503
2504     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2505     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2506
2507     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2508     mfc_context->aux_batchbuffer_surface.bo = NULL;
2509
2510     if (mfc_context->aux_batchbuffer)
2511         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2512
2513     mfc_context->aux_batchbuffer = NULL;
2514
2515     free(mfc_context);
2516 }
2517
2518 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2519                   VAProfile profile,
2520                   struct encode_state *encode_state,
2521                   struct intel_encoder_context *encoder_context)
2522 {
2523     VAStatus vaStatus;
2524
2525     switch (profile) {
2526     case VAProfileH264Baseline:
2527     case VAProfileH264Main:
2528     case VAProfileH264High:
2529         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2530         break;
2531
2532         /* FIXME: add for other profile */
2533     case VAProfileMPEG2Simple:
2534     case VAProfileMPEG2Main:
2535         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2536         break;
2537
2538     default:
2539         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2540         break;
2541     }
2542
2543     return vaStatus;
2544 }
2545
2546 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2547 {
2548     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2549
2550     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2551
2552     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2553     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2554
2555     mfc_context->gpe_context.curbe.length = 32 * 4;
2556
2557     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2558     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2559     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2560     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2561     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2562
2563     i965_gpe_load_kernels(ctx,
2564                           &mfc_context->gpe_context,
2565                           gen75_mfc_kernels,
2566                           NUM_MFC_KERNEL);
2567
2568     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2569     mfc_context->set_surface_state = gen75_mfc_surface_state;
2570     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2571     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2572     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2573     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2574     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2575     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2576
2577     encoder_context->mfc_context = mfc_context;
2578     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2579     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2580     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2581
2582     return True;
2583 }