Use GPU to construct MFX command buffer for H264 encoding on Haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define MFC_SOFTWARE_HASWELL    0
53
54 #define B0_STEP_REV             2
55 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
56
57 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
58 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
59 };
60
61 static struct i965_kernel gen75_mfc_kernels[] = {
62     {
63         "MFC AVC INTRA BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTRA,
65         gen75_mfc_batchbuffer_avc,
66         sizeof(gen75_mfc_batchbuffer_avc),
67         NULL
68     },
69 };
70
71 #define         INTER_MODE_MASK         0x03
72 #define         INTER_8X8               0x03
73 #define         INTER_16X8              0x01
74 #define         INTER_8X16              0x02
75 #define         SUBMB_SHAPE_MASK        0x00FF00
76
77 #define         INTER_MV8               (4 << 20)
78 #define         INTER_MV32              (6 << 20)
79
80
81 static void
82 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
83                            int standard_select,
84                            struct intel_encoder_context *encoder_context)
85 {
86     struct intel_batchbuffer *batch = encoder_context->base.batch;
87     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
88     assert(standard_select == MFX_FORMAT_MPEG2 ||
89            standard_select == MFX_FORMAT_AVC);
90
91     BEGIN_BCS_BATCH(batch, 5);
92
93     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
94     OUT_BCS_BATCH(batch,
95                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
96                   (MFD_MODE_VLD << 15) | /* VLD mode */
97                   (0 << 10) | /* Stream-Out Enable */
98                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
99                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
100                   (0 << 5)  | /* not in stitch mode */
101                   (1 << 4)  | /* encoding mode */
102                   (standard_select << 0));  /* standard select: avc or mpeg2 */
103     OUT_BCS_BATCH(batch,
104                   (0 << 7)  | /* expand NOA bus flag */
105                   (0 << 6)  | /* disable slice-level clock gating */
106                   (0 << 5)  | /* disable clock gating for NOA */
107                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
108                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
109                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
110                   (0 << 1)  |
111                   (0 << 0));
112     OUT_BCS_BATCH(batch, 0);
113     OUT_BCS_BATCH(batch, 0);
114
115     ADVANCE_BCS_BATCH(batch);
116 }
117
118 static void
119 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
120 {
121     struct intel_batchbuffer *batch = encoder_context->base.batch;
122     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
123
124     BEGIN_BCS_BATCH(batch, 6);
125
126     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
127     OUT_BCS_BATCH(batch, 0);
128     OUT_BCS_BATCH(batch,
129                   ((mfc_context->surface_state.height - 1) << 18) |
130                   ((mfc_context->surface_state.width - 1) << 4));
131     OUT_BCS_BATCH(batch,
132                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
133                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
134                   (0 << 22) | /* surface object control state, FIXME??? */
135                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
136                   (0 << 2)  | /* must be 0 for interleave U/V */
137                   (1 << 1)  | /* must be tiled */
138                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
139     OUT_BCS_BATCH(batch,
140                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
141                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
142     OUT_BCS_BATCH(batch, 0);
143
144     ADVANCE_BCS_BATCH(batch);
145 }
146
147 static void
148 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
149                                         struct intel_encoder_context *encoder_context)
150 {
151     struct intel_batchbuffer *batch = encoder_context->base.batch;
152     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
153     struct gen6_vme_context *vme_context = encoder_context->vme_context;
154
155     BEGIN_BCS_BATCH(batch, 26);
156
157     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
158     /* the DW1-3 is for the MFX indirect bistream offset */
159     OUT_BCS_BATCH(batch, 0);
160     OUT_BCS_BATCH(batch, 0);
161     OUT_BCS_BATCH(batch, 0);
162     /* the DW4-5 is the MFX upper bound */
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165
166     /* the DW6-10 is for MFX Indirect MV Object Base Address */
167     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
168     OUT_BCS_BATCH(batch, 0);
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0);
178     OUT_BCS_BATCH(batch, 0);
179
180     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
188     OUT_BCS_RELOC(batch,
189                   mfc_context->mfc_indirect_pak_bse_object.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);
192     OUT_BCS_BATCH(batch, 0);
193     OUT_BCS_BATCH(batch, 0);
194         
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
199     OUT_BCS_BATCH(batch, 0);
200
201     ADVANCE_BCS_BATCH(batch);
202 }
203
204 static void
205 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
206 {
207     struct intel_batchbuffer *batch = encoder_context->base.batch;
208     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
209     struct gen6_vme_context *vme_context = encoder_context->vme_context;
210     struct i965_driver_data *i965 = i965_driver_data(ctx);
211
212     if (IS_STEPPING_BPLUS(i965)) {
213         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
214         return;
215     }
216
217     BEGIN_BCS_BATCH(batch, 11);
218
219     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
220     OUT_BCS_BATCH(batch, 0);
221     OUT_BCS_BATCH(batch, 0);
222     /* MFX Indirect MV Object Base Address */
223     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
224     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
225     OUT_BCS_BATCH(batch, 0);
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
230     OUT_BCS_RELOC(batch,
231                   mfc_context->mfc_indirect_pak_bse_object.bo,
232                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233                   0);
234     OUT_BCS_RELOC(batch,
235                   mfc_context->mfc_indirect_pak_bse_object.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
238
239     ADVANCE_BCS_BATCH(batch);
240 }
241
242 static void
243 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
244                         struct intel_encoder_context *encoder_context)
245 {
246     struct intel_batchbuffer *batch = encoder_context->base.batch;
247     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
248     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
249
250     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
251     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
252
253     BEGIN_BCS_BATCH(batch, 16);
254
255     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
256     /*DW1. MB setting of frame */
257     OUT_BCS_BATCH(batch,
258                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
259     OUT_BCS_BATCH(batch, 
260                   ((height_in_mbs - 1) << 16) | 
261                   ((width_in_mbs - 1) << 0));
262     /* DW3 QP setting */
263     OUT_BCS_BATCH(batch, 
264                   (0 << 24) |   /* Second Chroma QP Offset */
265                   (0 << 16) |   /* Chroma QP Offset */
266                   (0 << 14) |   /* Max-bit conformance Intra flag */
267                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
268                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
269                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
270                   (0 << 8)  |   /* FIXME: Image Structure */
271                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
272     OUT_BCS_BATCH(batch,
273                   (0 << 16) |   /* Mininum Frame size */
274                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
275                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
276                   (0 << 13) |   /* CABAC 0 word insertion test enable */
277                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
278                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
279                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
280                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
281                   (0 << 6)  |   /* Only valid for VLD decoding mode */
282                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
283                   (0 << 4)  |   /* Direct 8x8 inference flag */
284                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
285                   (1 << 2)  |   /* Frame MB only flag */
286                   (0 << 1)  |   /* MBAFF mode is in active */
287                   (0 << 0));    /* Field picture flag */
288     /* DW5 Trellis quantization */
289     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
290     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
291                   (0xBB8 << 16) |       /* InterMbMaxSz */
292                   (0xEE8) );            /* IntraMbMaxSz */
293     OUT_BCS_BATCH(batch, 0);            /* Reserved */
294     /* DW8. QP delta */
295     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
296     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
297     /* DW10. Bit setting for MB */
298     OUT_BCS_BATCH(batch, 0x8C000000);
299     OUT_BCS_BATCH(batch, 0x00010000);
300     /* DW12. */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0x02010100);
303     /* DW14. For short format */
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307     ADVANCE_BCS_BATCH(batch);
308 }
309
310 static void
311 gen75_mfc_qm_state(VADriverContextP ctx,
312                    int qm_type,
313                    unsigned int *qm,
314                    int qm_length,
315                    struct intel_encoder_context *encoder_context)
316 {
317     struct intel_batchbuffer *batch = encoder_context->base.batch;
318     unsigned int qm_buffer[16];
319
320     assert(qm_length <= 16);
321     assert(sizeof(*qm) == 4);
322     memcpy(qm_buffer, qm, qm_length * 4);
323
324     BEGIN_BCS_BATCH(batch, 18);
325     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
326     OUT_BCS_BATCH(batch, qm_type << 0);
327     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
328     ADVANCE_BCS_BATCH(batch);
329 }
330
331 static void
332 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
333 {
334     unsigned int qm[16] = {
335         0x10101010, 0x10101010, 0x10101010, 0x10101010,
336         0x10101010, 0x10101010, 0x10101010, 0x10101010,
337         0x10101010, 0x10101010, 0x10101010, 0x10101010,
338         0x10101010, 0x10101010, 0x10101010, 0x10101010
339     };
340
341     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
342     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
343     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
344     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
345 }
346
347 static void
348 gen75_mfc_fqm_state(VADriverContextP ctx,
349                     int fqm_type,
350                     unsigned int *fqm,
351                     int fqm_length,
352                     struct intel_encoder_context *encoder_context)
353 {
354     struct intel_batchbuffer *batch = encoder_context->base.batch;
355     unsigned int fqm_buffer[32];
356
357     assert(fqm_length <= 32);
358     assert(sizeof(*fqm) == 4);
359     memcpy(fqm_buffer, fqm, fqm_length * 4);
360
361     BEGIN_BCS_BATCH(batch, 34);
362     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
363     OUT_BCS_BATCH(batch, fqm_type << 0);
364     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
365     ADVANCE_BCS_BATCH(batch);
366 }
367
368 static void
369 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
370 {
371     unsigned int qm[32] = {
372         0x10001000, 0x10001000, 0x10001000, 0x10001000,
373         0x10001000, 0x10001000, 0x10001000, 0x10001000,
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000
380     };
381
382     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
383     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
384     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
385     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
386 }
387
388 static void
389 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
390                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
391                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
392                             struct intel_batchbuffer *batch)
393 {
394     if (batch == NULL)
395         batch = encoder_context->base.batch;
396
397     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
398
399     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
400     OUT_BCS_BATCH(batch,
401                   (0 << 16) |   /* always start at offset 0 */
402                   (data_bits_in_last_dw << 8) |
403                   (skip_emul_byte_count << 4) |
404                   (!!emulation_flag << 3) |
405                   ((!!is_last_header) << 2) |
406                   ((!!is_end_of_slice) << 1) |
407                   (0 << 0));    /* FIXME: ??? */
408     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
409
410     ADVANCE_BCS_BATCH(batch);
411 }
412
413
414 static void gen75_mfc_init(VADriverContextP ctx,
415                            struct encode_state *encode_state,
416                            struct intel_encoder_context *encoder_context)
417 {
418     struct i965_driver_data *i965 = i965_driver_data(ctx);
419     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
420     dri_bo *bo;
421     int i;
422     int width_in_mbs = 0;
423     int height_in_mbs = 0;
424     int slice_batchbuffer_size;
425
426     if (encoder_context->codec == CODEC_H264) {
427         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
428         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
429         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
430     } else {
431         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
432
433         assert(encoder_context->codec == CODEC_MPEG2);
434
435         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
436         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
437     }
438
439     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
440                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
441
442     /*Encode common setup for MFC*/
443     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
444     mfc_context->post_deblocking_output.bo = NULL;
445
446     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
447     mfc_context->pre_deblocking_output.bo = NULL;
448
449     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
450     mfc_context->uncompressed_picture_source.bo = NULL;
451
452     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
453     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
454
455     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
456         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
457         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
458         mfc_context->direct_mv_buffers[i].bo = NULL;
459     }
460
461     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
462         if (mfc_context->reference_surfaces[i].bo != NULL)
463             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
464         mfc_context->reference_surfaces[i].bo = NULL;  
465     }
466
467     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
468     bo = dri_bo_alloc(i965->intel.bufmgr,
469                       "Buffer",
470                       width_in_mbs * 64,
471                       64);
472     assert(bo);
473     mfc_context->intra_row_store_scratch_buffer.bo = bo;
474
475     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
476     bo = dri_bo_alloc(i965->intel.bufmgr,
477                       "Buffer",
478                       width_in_mbs * height_in_mbs * 16,
479                       64);
480     assert(bo);
481     mfc_context->macroblock_status_buffer.bo = bo;
482
483     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
484     bo = dri_bo_alloc(i965->intel.bufmgr,
485                       "Buffer",
486                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
487                       64);
488     assert(bo);
489     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
490
491     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
492     bo = dri_bo_alloc(i965->intel.bufmgr,
493                       "Buffer",
494                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
495                       0x1000);
496     assert(bo);
497     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
498
499     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
500     mfc_context->mfc_batchbuffer_surface.bo = NULL;
501
502     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
503     mfc_context->aux_batchbuffer_surface.bo = NULL;
504
505     if (mfc_context->aux_batchbuffer)
506         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
507
508     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
509                                                         slice_batchbuffer_size);
510     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
511     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
512     mfc_context->aux_batchbuffer_surface.pitch = 16;
513     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
514     mfc_context->aux_batchbuffer_surface.size_block = 16;
515
516     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
517 }
518
519 static void
520 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
521                                     struct intel_encoder_context *encoder_context)
522 {
523     struct intel_batchbuffer *batch = encoder_context->base.batch;
524     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
525     int i;
526
527     BEGIN_BCS_BATCH(batch, 61);
528
529     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
530
531     /* the DW1-3 is for pre_deblocking */
532     if (mfc_context->pre_deblocking_output.bo)
533         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
534                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                       0);
536     else
537         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
538
539     OUT_BCS_BATCH(batch, 0);
540     OUT_BCS_BATCH(batch, 0);
541     /* the DW4-6 is for the post_deblocking */
542
543     if (mfc_context->post_deblocking_output.bo)
544         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
545                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
546                       0);                                                                                       /* post output addr  */ 
547     else
548         OUT_BCS_BATCH(batch, 0);
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* the DW7-9 is for the uncompressed_picture */
553     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
554                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
555                   0); /* uncompressed data */
556
557     OUT_BCS_BATCH(batch, 0);
558     OUT_BCS_BATCH(batch, 0);
559
560     /* the DW10-12 is for the mb status */
561     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
562                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                   0); /* StreamOut data*/
564     OUT_BCS_BATCH(batch, 0);
565     OUT_BCS_BATCH(batch, 0);
566
567     /* the DW13-15 is for the intra_row_store_scratch */
568     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
569                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
570                   0);   
571     OUT_BCS_BATCH(batch, 0);
572     OUT_BCS_BATCH(batch, 0);
573
574     /* the DW16-18 is for the deblocking filter */
575     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
576                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
577                   0);
578     OUT_BCS_BATCH(batch, 0);
579     OUT_BCS_BATCH(batch, 0);
580
581     /* the DW 19-50 is for Reference pictures*/
582     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
583         if ( mfc_context->reference_surfaces[i].bo != NULL) {
584             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
585                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
586                           0);                   
587         } else {
588             OUT_BCS_BATCH(batch, 0);
589         }
590         OUT_BCS_BATCH(batch, 0);
591     }
592     OUT_BCS_BATCH(batch, 0);
593
594     /* The DW 52-54 is for the MB status buffer */
595     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
596                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
597                   0);                                                                                   /* Macroblock status buffer*/
598         
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601
602     /* the DW 55-57 is the ILDB buffer */
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606
607     /* the DW 58-60 is the second ILDB buffer */
608     OUT_BCS_BATCH(batch, 0);
609     OUT_BCS_BATCH(batch, 0);
610     OUT_BCS_BATCH(batch, 0);
611     ADVANCE_BCS_BATCH(batch);
612 }
613
614 static void
615 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
616 {
617     struct intel_batchbuffer *batch = encoder_context->base.batch;
618     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
619     struct i965_driver_data *i965 = i965_driver_data(ctx);
620     int i;
621
622     if (IS_STEPPING_BPLUS(i965)) {
623         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
624         return;
625     }
626
627     BEGIN_BCS_BATCH(batch, 25);
628
629     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
630
631     if (mfc_context->pre_deblocking_output.bo)
632         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
633                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634                       0);
635     else
636         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
637
638     if (mfc_context->post_deblocking_output.bo)
639         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
640                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
641                       0);                                                                                       /* post output addr  */ 
642     else
643         OUT_BCS_BATCH(batch, 0);
644
645     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
646                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
647                   0);                                                                                   /* uncompressed data */
648     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
649                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
650                   0);                                                                                   /* StreamOut data*/
651     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
652                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
653                   0);   
654     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
655                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
656                   0);
657     /* 7..22 Reference pictures*/
658     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
659         if ( mfc_context->reference_surfaces[i].bo != NULL) {
660             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
661                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
662                           0);                   
663         } else {
664             OUT_BCS_BATCH(batch, 0);
665         }
666     }
667     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
668                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
669                   0);                                                                                   /* Macroblock status buffer*/
670
671     OUT_BCS_BATCH(batch, 0);
672
673     ADVANCE_BCS_BATCH(batch);
674 }
675
676 static void
677 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
678                                      struct intel_encoder_context *encoder_context)
679 {
680     struct intel_batchbuffer *batch = encoder_context->base.batch;
681     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
682
683     int i;
684
685     BEGIN_BCS_BATCH(batch, 71);
686
687     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
688
689     /* Reference frames and Current frames */
690     /* the DW1-32 is for the direct MV for reference */
691     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
692         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
693             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
694                           I915_GEM_DOMAIN_INSTRUCTION, 0,
695                           0);
696             OUT_BCS_BATCH(batch, 0);
697         } else {
698             OUT_BCS_BATCH(batch, 0);
699             OUT_BCS_BATCH(batch, 0);
700         }
701     }
702     OUT_BCS_BATCH(batch, 0);
703
704     /* the DW34-36 is the MV for the current reference */
705     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
706                   I915_GEM_DOMAIN_INSTRUCTION, 0,
707                   0);
708
709     OUT_BCS_BATCH(batch, 0);
710     OUT_BCS_BATCH(batch, 0);
711
712     /* POL list */
713     for(i = 0; i < 32; i++) {
714         OUT_BCS_BATCH(batch, i/2);
715     }
716     OUT_BCS_BATCH(batch, 0);
717     OUT_BCS_BATCH(batch, 0);
718
719     ADVANCE_BCS_BATCH(batch);
720 }
721
722 static void
723 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
724 {
725     struct intel_batchbuffer *batch = encoder_context->base.batch;
726     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
727     struct i965_driver_data *i965 = i965_driver_data(ctx);
728     int i;
729
730     if (IS_STEPPING_BPLUS(i965)) {
731         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
732         return;
733     }
734
735     BEGIN_BCS_BATCH(batch, 69);
736
737     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
738
739     /* Reference frames and Current frames */
740     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
741         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
742             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
743                           I915_GEM_DOMAIN_INSTRUCTION, 0,
744                           0);
745         } else {
746             OUT_BCS_BATCH(batch, 0);
747         }
748     }
749
750     /* POL list */
751     for(i = 0; i < 32; i++) {
752         OUT_BCS_BATCH(batch, i/2);
753     }
754     OUT_BCS_BATCH(batch, 0);
755     OUT_BCS_BATCH(batch, 0);
756
757     ADVANCE_BCS_BATCH(batch);
758 }
759
760
761 static void
762 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
763                                         struct intel_encoder_context *encoder_context)
764 {
765     struct intel_batchbuffer *batch = encoder_context->base.batch;
766     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
767
768     BEGIN_BCS_BATCH(batch, 10);
769
770     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
771     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
772                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
773                   0);
774     OUT_BCS_BATCH(batch, 0);
775     OUT_BCS_BATCH(batch, 0);
776         
777     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
778     OUT_BCS_BATCH(batch, 0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781
782     /* the DW7-9 is for Bitplane Read Buffer Base Address */
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786
787     ADVANCE_BCS_BATCH(batch);
788 }
789
790 static void
791 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
792 {
793     struct intel_batchbuffer *batch = encoder_context->base.batch;
794     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
795     struct i965_driver_data *i965 = i965_driver_data(ctx);
796
797     if (IS_STEPPING_BPLUS(i965)) {
798         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
799         return;
800     }
801
802     BEGIN_BCS_BATCH(batch, 4);
803
804     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
805     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
806                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
807                   0);
808     OUT_BCS_BATCH(batch, 0);
809     OUT_BCS_BATCH(batch, 0);
810
811     ADVANCE_BCS_BATCH(batch);
812 }
813
814
815 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
816                                                        struct encode_state *encode_state,
817                                                        struct intel_encoder_context *encoder_context)
818 {
819     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
820
821     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
822     mfc_context->set_surface_state(ctx, encoder_context);
823     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
824     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
825     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
826     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
827     mfc_context->avc_qm_state(ctx, encoder_context);
828     mfc_context->avc_fqm_state(ctx, encoder_context);
829     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
830     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
831 }
832
833
834 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
835                               struct encode_state *encode_state,
836                               struct intel_encoder_context *encoder_context)
837 {
838     struct intel_batchbuffer *batch = encoder_context->base.batch;
839
840     intel_batchbuffer_flush(batch);             //run the pipeline
841
842     return VA_STATUS_SUCCESS;
843 }
844
845
846 static VAStatus
847 gen75_mfc_stop(VADriverContextP ctx, 
848                struct encode_state *encode_state,
849                struct intel_encoder_context *encoder_context,
850                int *encoded_bits_size)
851 {
852     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
853     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
854     VACodedBufferSegment *coded_buffer_segment;
855     
856     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
857     assert(vaStatus == VA_STATUS_SUCCESS);
858     *encoded_bits_size = coded_buffer_segment->size * 8;
859     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
860
861     return VA_STATUS_SUCCESS;
862 }
863
864
865 static void
866 gen75_mfc_avc_slice_state(VADriverContextP ctx,
867                           VAEncPictureParameterBufferH264 *pic_param,
868                           VAEncSliceParameterBufferH264 *slice_param,
869                           struct encode_state *encode_state,
870                           struct intel_encoder_context *encoder_context,
871                           int rate_control_enable,
872                           int qp,
873                           struct intel_batchbuffer *batch)
874 {
875     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
876     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
877     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
878     int beginmb = slice_param->macroblock_address;
879     int endmb = beginmb + slice_param->num_macroblocks;
880     int beginx = beginmb % width_in_mbs;
881     int beginy = beginmb / width_in_mbs;
882     int nextx =  endmb % width_in_mbs;
883     int nexty = endmb / width_in_mbs;
884     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
885     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
886     int maxQpN, maxQpP;
887     unsigned char correct[6], grow, shrink;
888     int i;
889     int weighted_pred_idc = 0;
890     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
891     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
892     int num_ref_l0 = 0, num_ref_l1 = 0;
893
894     if (batch == NULL)
895         batch = encoder_context->base.batch;
896
897     if (slice_type == SLICE_TYPE_I) {
898         luma_log2_weight_denom = 0;
899         chroma_log2_weight_denom = 0;
900     } else if (slice_type == SLICE_TYPE_P) {
901         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
902         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
903
904         if (slice_param->num_ref_idx_active_override_flag)
905             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
906     } else if (slice_type == SLICE_TYPE_B) {
907         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
908         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
909         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
910
911         if (slice_param->num_ref_idx_active_override_flag) {
912             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
913             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
914         }
915
916         if (weighted_pred_idc == 2) {
917             /* 8.4.3 - Derivation process for prediction weights (8-279) */
918             luma_log2_weight_denom = 5;
919             chroma_log2_weight_denom = 5;
920         }
921     }
922
923     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
924     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
925
926     for (i = 0; i < 6; i++)
927         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
928
929     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
930         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
931     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
932         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
933
934     BEGIN_BCS_BATCH(batch, 11);;
935
936     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
937     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
938
939     OUT_BCS_BATCH(batch,
940                   (num_ref_l0 << 16) |
941                   (num_ref_l1 << 24) |
942                   (chroma_log2_weight_denom << 8) |
943                   (luma_log2_weight_denom << 0));
944
945     OUT_BCS_BATCH(batch, 
946                   (weighted_pred_idc << 30) |
947                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
948                   (slice_param->disable_deblocking_filter_idc << 27) |
949                   (slice_param->cabac_init_idc << 24) |
950                   (qp<<16) |                    /*Slice Quantization Parameter*/
951                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
952                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
953     OUT_BCS_BATCH(batch,
954                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
955                   (beginx << 16) |
956                   slice_param->macroblock_address );
957     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
958     OUT_BCS_BATCH(batch, 
959                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
960                   (1 << 30) |           /*ResetRateControlCounter*/
961                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
962                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
963                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
964                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
965                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
966                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
967                   (last_slice << 19) |     /*IsLastSlice*/
968                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
969                   (1 << 17) |       /*HeaderPresentFlag*/       
970                   (1 << 16) |       /*SliceData PresentFlag*/
971                   (1 << 15) |       /*TailPresentFlag*/
972                   (1 << 13) |       /*RBSP NAL TYPE*/   
973                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
974     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
975     OUT_BCS_BATCH(batch,
976                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
977                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
978                   (shrink << 8)  |
979                   (grow << 0));   
980     OUT_BCS_BATCH(batch,
981                   (correct[5] << 20) |
982                   (correct[4] << 16) |
983                   (correct[3] << 12) |
984                   (correct[2] << 8) |
985                   (correct[1] << 4) |
986                   (correct[0] << 0));
987     OUT_BCS_BATCH(batch, 0);
988
989     ADVANCE_BCS_BATCH(batch);
990 }
991
992
993 #if MFC_SOFTWARE_HASWELL
994
995 static int
996 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
997                                int qp,unsigned int *msg,
998                                struct intel_encoder_context *encoder_context,
999                                unsigned char target_mb_size, unsigned char max_mb_size,
1000                                struct intel_batchbuffer *batch)
1001 {
1002     int len_in_dwords = 12;
1003     unsigned int intra_msg;
1004 #define         INTRA_MSG_FLAG          (1 << 13)
1005 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1006     if (batch == NULL)
1007         batch = encoder_context->base.batch;
1008
1009     BEGIN_BCS_BATCH(batch, len_in_dwords);
1010
1011     intra_msg = msg[0] & 0xC0FF;
1012     intra_msg |= INTRA_MSG_FLAG;
1013     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1014     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1015     OUT_BCS_BATCH(batch, 0);
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 
1018                   (0 << 24) |           /* PackedMvNum, Debug*/
1019                   (0 << 20) |           /* No motion vector */
1020                   (1 << 19) |           /* CbpDcY */
1021                   (1 << 18) |           /* CbpDcU */
1022                   (1 << 17) |           /* CbpDcV */
1023                   intra_msg);
1024
1025     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1026     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1027     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1028
1029     /*Stuff for Intra MB*/
1030     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1031     OUT_BCS_BATCH(batch, msg[2]);       
1032     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1033     
1034     /*MaxSizeInWord and TargetSzieInWord*/
1035     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1036                   (target_mb_size << 16) );
1037
1038     OUT_BCS_BATCH(batch, 0);
1039
1040     ADVANCE_BCS_BATCH(batch);
1041
1042     return len_in_dwords;
1043 }
1044
1045 static int
1046 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1047                                unsigned int *msg, unsigned int offset,
1048                                struct intel_encoder_context *encoder_context,
1049                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1050                                struct intel_batchbuffer *batch)
1051 {
1052     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1053     int len_in_dwords = 12;
1054     unsigned int inter_msg = 0;
1055     if (batch == NULL)
1056         batch = encoder_context->base.batch;
1057     {
1058 #define MSG_MV_OFFSET   4
1059         unsigned int *mv_ptr;
1060         mv_ptr = msg + MSG_MV_OFFSET;
1061         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1062          * to convert them to be compatible with the format of AVC_PAK
1063          * command.
1064          */
1065         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1066             /* MV[0] and MV[2] are replicated */
1067             mv_ptr[4] = mv_ptr[0];
1068             mv_ptr[5] = mv_ptr[1];
1069             mv_ptr[2] = mv_ptr[8];
1070             mv_ptr[3] = mv_ptr[9];
1071             mv_ptr[6] = mv_ptr[8];
1072             mv_ptr[7] = mv_ptr[9];
1073         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1074             /* MV[0] and MV[1] are replicated */
1075             mv_ptr[2] = mv_ptr[0];
1076             mv_ptr[3] = mv_ptr[1];
1077             mv_ptr[4] = mv_ptr[16];
1078             mv_ptr[5] = mv_ptr[17];
1079             mv_ptr[6] = mv_ptr[24];
1080             mv_ptr[7] = mv_ptr[25];
1081         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1082                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1083             /* Don't touch MV[0] or MV[1] */
1084             mv_ptr[2] = mv_ptr[8];
1085             mv_ptr[3] = mv_ptr[9];
1086             mv_ptr[4] = mv_ptr[16];
1087             mv_ptr[5] = mv_ptr[17];
1088             mv_ptr[6] = mv_ptr[24];
1089             mv_ptr[7] = mv_ptr[25];
1090         }
1091     }
1092
1093     BEGIN_BCS_BATCH(batch, len_in_dwords);
1094
1095     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1096
1097     inter_msg = 32;
1098     /* MV quantity */
1099     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1100         if (msg[1] & SUBMB_SHAPE_MASK)
1101             inter_msg = 128;
1102     }
1103     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1104     OUT_BCS_BATCH(batch, offset);
1105     inter_msg = msg[0] & (0x1F00FFFF);
1106     inter_msg |= INTER_MV8;
1107     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1108     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1109         (msg[1] & SUBMB_SHAPE_MASK)) {
1110         inter_msg |= INTER_MV32;
1111     }
1112
1113     OUT_BCS_BATCH(batch, inter_msg);
1114
1115     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1116     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1117 #if 0 
1118     if ( slice_type == SLICE_TYPE_B) {
1119         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1120     } else {
1121         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1122     }
1123 #else
1124     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1125 #endif
1126
1127     inter_msg = msg[1] >> 8;
1128     /*Stuff for Inter MB*/
1129     OUT_BCS_BATCH(batch, inter_msg);        
1130     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1131     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1132
1133     /*MaxSizeInWord and TargetSzieInWord*/
1134     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1135                   (target_mb_size << 16) );
1136
1137     OUT_BCS_BATCH(batch, 0x0);    
1138
1139     ADVANCE_BCS_BATCH(batch);
1140
1141     return len_in_dwords;
1142 }
1143
1144 static void 
1145 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1146                                         struct encode_state *encode_state,
1147                                         struct intel_encoder_context *encoder_context,
1148                                         int slice_index,
1149                                         struct intel_batchbuffer *slice_batch)
1150 {
1151     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1152     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1153     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1154     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1155     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1156     unsigned int *msg = NULL, offset = 0;
1157     unsigned char *msg_ptr = NULL;
1158     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1159     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1160     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1161     int i,x,y;
1162     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1163     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1164     unsigned char *slice_header = NULL;
1165     int slice_header_length_in_bits = 0;
1166     unsigned int tail_data[] = { 0x0, 0x0 };
1167     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1168     int is_intra = slice_type == SLICE_TYPE_I;
1169
1170     if (rate_control_mode == VA_RC_CBR) {
1171         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1172         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1173     }
1174
1175     /* only support for 8-bit pixel bit-depth */
1176     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1177     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1178     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1179     assert(qp >= 0 && qp < 52);
1180
1181     gen75_mfc_avc_slice_state(ctx, 
1182                               pPicParameter,
1183                               pSliceParameter,
1184                               encode_state, encoder_context,
1185                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1186
1187     if ( slice_index == 0) 
1188         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1189
1190     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1191
1192     // slice hander
1193     mfc_context->insert_object(ctx, encoder_context,
1194                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1195                                5,  /* first 5 bytes are start code + nal unit type */
1196                                1, 0, 1, slice_batch);
1197
1198     dri_bo_map(vme_context->vme_output.bo , 1);
1199     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1200
1201     if (is_intra) {
1202         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1203     } else {
1204         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1205     }
1206    
1207     for (i = pSliceParameter->macroblock_address; 
1208          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1209         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1210         x = i % width_in_mbs;
1211         y = i / width_in_mbs;
1212         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1213
1214         if (is_intra) {
1215             assert(msg);
1216             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1217         } else {
1218             int inter_rdo, intra_rdo;
1219             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1220             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1221             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1222             if (intra_rdo < inter_rdo) { 
1223                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224             } else {
1225                 msg += AVC_INTER_MSG_OFFSET;
1226                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1227             }
1228         }
1229     }
1230    
1231     dri_bo_unmap(vme_context->vme_output.bo);
1232
1233     if ( last_slice ) {    
1234         mfc_context->insert_object(ctx, encoder_context,
1235                                    tail_data, 2, 8,
1236                                    2, 1, 1, 0, slice_batch);
1237     } else {
1238         mfc_context->insert_object(ctx, encoder_context,
1239                                    tail_data, 1, 8,
1240                                    1, 1, 1, 0, slice_batch);
1241     }
1242
1243     free(slice_header);
1244
1245 }
1246
1247 static dri_bo *
1248 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1249                                    struct encode_state *encode_state,
1250                                    struct intel_encoder_context *encoder_context)
1251 {
1252     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1253     struct i965_driver_data *i965 = i965_driver_data(ctx);
1254     struct intel_batchbuffer *batch;
1255     dri_bo *batch_bo;
1256     int i;
1257     int buffer_size;
1258
1259     batch = mfc_context->aux_batchbuffer;
1260     batch_bo = batch->buffer;
1261     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1262         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1263     }
1264
1265     intel_batchbuffer_align(batch, 8);
1266     
1267     BEGIN_BCS_BATCH(batch, 2);
1268     OUT_BCS_BATCH(batch, 0);
1269     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1270     ADVANCE_BCS_BATCH(batch);
1271
1272     dri_bo_reference(batch_bo);
1273
1274     intel_batchbuffer_free(batch);
1275     mfc_context->aux_batchbuffer = NULL;
1276
1277     return batch_bo;
1278 }
1279
1280 #else
1281
1282 static void
1283 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1284                                      struct encode_state *encode_state,
1285                                      struct intel_encoder_context *encoder_context)
1286
1287 {
1288     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1289     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1290
1291     assert(vme_context->vme_output.bo);
1292     mfc_context->buffer_suface_setup(ctx,
1293                                      &mfc_context->gpe_context,
1294                                      &vme_context->vme_output,
1295                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1296                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1297 }
1298
1299 static void
1300 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1301                                       struct encode_state *encode_state,
1302                                       struct intel_encoder_context *encoder_context)
1303
1304 {
1305     struct i965_driver_data *i965 = i965_driver_data(ctx);
1306     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1307     assert(mfc_context->aux_batchbuffer_surface.bo);
1308     mfc_context->buffer_suface_setup(ctx,
1309                                      &mfc_context->gpe_context,
1310                                      &mfc_context->aux_batchbuffer_surface,
1311                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1312                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1313 }
1314
1315 static void
1316 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1317                                      struct encode_state *encode_state,
1318                                      struct intel_encoder_context *encoder_context)
1319 {
1320     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1321     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1322 }
1323
1324 static void
1325 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1326                                  struct encode_state *encode_state,
1327                                  struct intel_encoder_context *encoder_context)
1328 {
1329     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1330     struct gen6_interface_descriptor_data *desc;   
1331     int i;
1332     dri_bo *bo;
1333
1334     bo = mfc_context->gpe_context.idrt.bo;
1335     dri_bo_map(bo, 1);
1336     assert(bo->virtual);
1337     desc = bo->virtual;
1338
1339     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1340         struct i965_kernel *kernel;
1341
1342         kernel = &mfc_context->gpe_context.kernels[i];
1343         assert(sizeof(*desc) == 32);
1344
1345         /*Setup the descritor table*/
1346         memset(desc, 0, sizeof(*desc));
1347         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1348         desc->desc2.sampler_count = 0;
1349         desc->desc2.sampler_state_pointer = 0;
1350         desc->desc3.binding_table_entry_count = 2;
1351         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1352         desc->desc4.constant_urb_entry_read_offset = 0;
1353         desc->desc4.constant_urb_entry_read_length = 4;
1354                 
1355         /*kernel start*/
1356         dri_bo_emit_reloc(bo,   
1357                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1358                           0,
1359                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1360                           kernel->bo);
1361         desc++;
1362     }
1363
1364     dri_bo_unmap(bo);
1365 }
1366
1367 static void
1368 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1369                                      struct encode_state *encode_state,
1370                                      struct intel_encoder_context *encoder_context)
1371 {
1372     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1373     
1374     (void)mfc_context;
1375 }
1376
1377 #define AVC_PAK_LEN_IN_BYTE     48
1378 #define AVC_PAK_LEN_IN_OWORD    3
1379
1380 static void
1381 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1382                                           uint32_t intra_flag,
1383                                           int head_offset,
1384                                           int number_mb_cmds,
1385                                           int slice_end_x,
1386                                           int slice_end_y,
1387                                           int mb_x,
1388                                           int mb_y,
1389                                           int width_in_mbs,
1390                                           int qp,
1391                                           uint32_t fwd_ref,
1392                                           uint32_t bwd_ref)
1393 {
1394     uint32_t temp_value;
1395     BEGIN_BATCH(batch, 14);
1396     
1397     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1398     OUT_BATCH(batch, 0);
1399     OUT_BATCH(batch, 0);
1400     OUT_BATCH(batch, 0);
1401     OUT_BATCH(batch, 0);
1402     OUT_BATCH(batch, 0);
1403    
1404     /*inline data */
1405     OUT_BATCH(batch, head_offset / 16);
1406     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1407     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1408     OUT_BATCH(batch, temp_value);
1409
1410     OUT_BATCH(batch, number_mb_cmds);
1411
1412     OUT_BATCH(batch,
1413               ((slice_end_y << 8) | (slice_end_x)));
1414     OUT_BATCH(batch, fwd_ref);
1415     OUT_BATCH(batch, bwd_ref);
1416
1417     OUT_BATCH(batch, MI_NOOP);
1418
1419     ADVANCE_BATCH(batch);
1420 }
1421
1422 static void
1423 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1424                                         struct intel_encoder_context *encoder_context,
1425                                         VAEncSliceParameterBufferH264 *slice_param,
1426                                         int head_offset,
1427                                         int qp,
1428                                         int last_slice)
1429 {
1430     struct intel_batchbuffer *batch = encoder_context->base.batch;
1431     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1432     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1433     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1434     int total_mbs = slice_param->num_macroblocks;
1435     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1436     int number_mb_cmds = 128;
1437     int starting_offset = 0;
1438     int i;
1439     int mb_x, mb_y;
1440     int last_mb, slice_end_x, slice_end_y;
1441     int remaining_mb = total_mbs;
1442     uint32_t fwd_ref , bwd_ref, mb_flag;
1443
1444     last_mb = slice_param->macroblock_address + total_mbs - 1;
1445     slice_end_x = last_mb % width_in_mbs;
1446     slice_end_y = last_mb / width_in_mbs;
1447
1448     if (slice_type == SLICE_TYPE_I) {
1449         fwd_ref = 0;
1450         bwd_ref = 0;
1451         mb_flag = 1;
1452     } else {
1453         fwd_ref = vme_context->ref_index_in_mb[0];
1454         bwd_ref = vme_context->ref_index_in_mb[1];
1455         mb_flag = 0;
1456     }
1457
1458     if (width_in_mbs >= 100) {
1459         number_mb_cmds = width_in_mbs / 5;
1460     } else if (width_in_mbs >= 80) {
1461         number_mb_cmds = width_in_mbs / 4;
1462     } else if (width_in_mbs >= 60) {
1463         number_mb_cmds = width_in_mbs / 3;
1464     } else if (width_in_mbs >= 40) {
1465         number_mb_cmds = width_in_mbs / 2;
1466     } else {
1467         number_mb_cmds = width_in_mbs;
1468     }
1469
1470     do {
1471         if (number_mb_cmds >= remaining_mb) {
1472                 number_mb_cmds = remaining_mb;
1473         }
1474         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1475         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1476
1477         gen75_mfc_batchbuffer_emit_object_command(batch,
1478                                                   mb_flag,
1479                                                   head_offset,
1480                                                   number_mb_cmds,
1481                                                   slice_end_x,
1482                                                   slice_end_y,
1483                                                   mb_x,
1484                                                   mb_y,
1485                                                   width_in_mbs,
1486                                                   qp,
1487                                                   fwd_ref,
1488                                                   bwd_ref);
1489
1490         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1491         remaining_mb -= number_mb_cmds;
1492         starting_offset += number_mb_cmds;
1493     } while (remaining_mb > 0);
1494 }
1495                           
1496 /*
1497  * return size in Owords (16bytes)
1498  */         
1499 static void
1500 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1501                                 struct encode_state *encode_state,
1502                                 struct intel_encoder_context *encoder_context,
1503                                 int slice_index)
1504 {
1505     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1506     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1507     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1508     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1509     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1510     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1511     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1512     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1513     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1514     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1515     unsigned char *slice_header = NULL;
1516     int slice_header_length_in_bits = 0;
1517     unsigned int tail_data[] = { 0x0, 0x0 };
1518     long head_offset;
1519     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1520
1521     if (rate_control_mode == VA_RC_CBR) {
1522         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1523         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1524     }
1525
1526     /* only support for 8-bit pixel bit-depth */
1527     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1528     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1529     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1530     assert(qp >= 0 && qp < 52);
1531
1532     gen75_mfc_avc_slice_state(ctx,
1533                               pPicParameter,
1534                               pSliceParameter,
1535                               encode_state,
1536                               encoder_context,
1537                               (rate_control_mode == VA_RC_CBR),
1538                               qp,
1539                               slice_batch);
1540
1541     if (slice_index == 0)
1542         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1543
1544     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1545
1546     // slice hander
1547     mfc_context->insert_object(ctx,
1548                                encoder_context,
1549                                (unsigned int *)slice_header,
1550                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1551                                slice_header_length_in_bits & 0x1f,
1552                                5,  /* first 5 bytes are start code + nal unit type */
1553                                1,
1554                                0,
1555                                1,
1556                                slice_batch);
1557     free(slice_header);
1558
1559     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1560     head_offset = intel_batchbuffer_used_size(slice_batch);
1561
1562     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1563
1564     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1565                                             encoder_context,
1566                                             pSliceParameter,
1567                                             head_offset,
1568                                             qp,
1569                                             last_slice);
1570
1571
1572     /* Aligned for tail */
1573     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1574     if (last_slice) {    
1575         mfc_context->insert_object(ctx,
1576                                    encoder_context,
1577                                    tail_data,
1578                                    2,
1579                                    8,
1580                                    2,
1581                                    1,
1582                                    1,
1583                                    0,
1584                                    slice_batch);
1585     } else {
1586         mfc_context->insert_object(ctx,
1587                                    encoder_context,
1588                                    tail_data,
1589                                    1,
1590                                    8,
1591                                    1,
1592                                    1,
1593                                    1,
1594                                    0,
1595                                    slice_batch);
1596     }
1597
1598     return;
1599 }
1600
1601 static void
1602 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1603                                    struct encode_state *encode_state,
1604                                    struct intel_encoder_context *encoder_context)
1605 {
1606     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1607     struct intel_batchbuffer *batch = encoder_context->base.batch;
1608     int i, size, offset = 0;
1609     intel_batchbuffer_start_atomic(batch, 0x4000); 
1610     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1611
1612     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1613         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1614     }
1615     {
1616         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1617         intel_batchbuffer_align(slice_batch, 8);
1618         BEGIN_BCS_BATCH(slice_batch, 2);
1619         OUT_BCS_BATCH(slice_batch, 0);
1620         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1621         ADVANCE_BCS_BATCH(slice_batch);
1622     }
1623     intel_batchbuffer_end_atomic(batch);
1624     intel_batchbuffer_flush(batch);
1625 }
1626
1627 static void
1628 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1629                                 struct encode_state *encode_state,
1630                                 struct intel_encoder_context *encoder_context)
1631 {
1632     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1633     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1634     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1635     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1636 }
1637
1638 static dri_bo *
1639 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1640                                    struct encode_state *encode_state,
1641                                    struct intel_encoder_context *encoder_context)
1642 {
1643     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1644
1645     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1646     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1647
1648     return mfc_context->aux_batchbuffer_surface.bo;
1649 }
1650
1651 #endif
1652
1653 static void
1654 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1655                                   struct encode_state *encode_state,
1656                                   struct intel_encoder_context *encoder_context)
1657 {
1658     struct intel_batchbuffer *batch = encoder_context->base.batch;
1659     dri_bo *slice_batch_bo;
1660
1661     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1662         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1663         assert(0);
1664         return; 
1665     }
1666
1667 #if MFC_SOFTWARE_HASWELL
1668     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1669 #else
1670     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1671 #endif
1672
1673     // begin programing
1674     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1675     intel_batchbuffer_emit_mi_flush(batch);
1676     
1677     // picture level programing
1678     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1679
1680     BEGIN_BCS_BATCH(batch, 2);
1681     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1682     OUT_BCS_RELOC(batch,
1683                   slice_batch_bo,
1684                   I915_GEM_DOMAIN_COMMAND, 0, 
1685                   0);
1686     ADVANCE_BCS_BATCH(batch);
1687
1688     // end programing
1689     intel_batchbuffer_end_atomic(batch);
1690
1691     dri_bo_unreference(slice_batch_bo);
1692 }
1693
1694
1695 static VAStatus
1696 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1697                              struct encode_state *encode_state,
1698                              struct intel_encoder_context *encoder_context)
1699 {
1700     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1701     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1702     int current_frame_bits_size;
1703     int sts;
1704  
1705     for (;;) {
1706         gen75_mfc_init(ctx, encode_state, encoder_context);
1707         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1708         /*Programing bcs pipeline*/
1709         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1710         gen75_mfc_run(ctx, encode_state, encoder_context);
1711         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1712             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1713             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1714             if (sts == BRC_NO_HRD_VIOLATION) {
1715                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1716                 break;
1717             }
1718             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1719                 if (!mfc_context->hrd.violation_noted) {
1720                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1721                     mfc_context->hrd.violation_noted = 1;
1722                 }
1723                 return VA_STATUS_SUCCESS;
1724             }
1725         } else {
1726             break;
1727         }
1728     }
1729
1730     return VA_STATUS_SUCCESS;
1731 }
1732
1733 /*
1734  * MPEG-2
1735  */
1736
1737 static const int
1738 va_to_gen75_mpeg2_picture_type[3] = {
1739     1,  /* I */
1740     2,  /* P */
1741     3   /* B */
1742 };
1743
1744 static void
1745 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1746                           struct intel_encoder_context *encoder_context,
1747                           struct encode_state *encode_state)
1748 {
1749     struct intel_batchbuffer *batch = encoder_context->base.batch;
1750     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1751     VAEncPictureParameterBufferMPEG2 *pic_param;
1752     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1753     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1754     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1755
1756     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1757     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1758     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1759
1760     BEGIN_BCS_BATCH(batch, 13);
1761     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1762     OUT_BCS_BATCH(batch,
1763                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1764                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1765                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1766                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1767                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1768                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1769                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1770                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1771                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1772                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1773                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1774                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1775     OUT_BCS_BATCH(batch,
1776                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1777                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1778                   0);
1779     OUT_BCS_BATCH(batch,
1780                   1 << 31 |     /* slice concealment */
1781                   (height_in_mbs - 1) << 16 |
1782                   (width_in_mbs - 1));
1783     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1784         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1785     else
1786         OUT_BCS_BATCH(batch, 0);
1787
1788     OUT_BCS_BATCH(batch, 0);
1789     OUT_BCS_BATCH(batch,
1790                   0xFFF << 16 | /* InterMBMaxSize */
1791                   0xFFF << 0 |  /* IntraMBMaxSize */
1792                   0);
1793     OUT_BCS_BATCH(batch, 0);
1794     OUT_BCS_BATCH(batch, 0);
1795     OUT_BCS_BATCH(batch, 0);
1796     OUT_BCS_BATCH(batch, 0);
1797     OUT_BCS_BATCH(batch, 0);
1798     OUT_BCS_BATCH(batch, 0);
1799     ADVANCE_BCS_BATCH(batch);
1800 }
1801
1802 static void
1803 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1804 {
1805     unsigned char intra_qm[64] = {
1806         8, 16, 19, 22, 26, 27, 29, 34,
1807         16, 16, 22, 24, 27, 29, 34, 37,
1808         19, 22, 26, 27, 29, 34, 34, 38,
1809         22, 22, 26, 27, 29, 34, 37, 40,
1810         22, 26, 27, 29, 32, 35, 40, 48,
1811         26, 27, 29, 32, 35, 40, 48, 58,
1812         26, 27, 29, 34, 38, 46, 56, 69,
1813         27, 29, 35, 38, 46, 56, 69, 83
1814     };
1815
1816     unsigned char non_intra_qm[64] = {
1817         16, 16, 16, 16, 16, 16, 16, 16,
1818         16, 16, 16, 16, 16, 16, 16, 16,
1819         16, 16, 16, 16, 16, 16, 16, 16,
1820         16, 16, 16, 16, 16, 16, 16, 16,
1821         16, 16, 16, 16, 16, 16, 16, 16,
1822         16, 16, 16, 16, 16, 16, 16, 16,
1823         16, 16, 16, 16, 16, 16, 16, 16,
1824         16, 16, 16, 16, 16, 16, 16, 16
1825     };
1826
1827     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1828     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1829 }
1830
1831 static void
1832 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1833 {
1834     unsigned short intra_fqm[64] = {
1835         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1836         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1837         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1838         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1839         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1840         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1841         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1842         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1843     };
1844
1845     unsigned short non_intra_fqm[64] = {
1846         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1847         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1848         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1849         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1850         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1851         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1852         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1853         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1854     };
1855
1856     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1857     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1858 }
1859
1860 static void
1861 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1862                                  struct intel_encoder_context *encoder_context,
1863                                  int x, int y,
1864                                  int next_x, int next_y,
1865                                  int is_fisrt_slice_group,
1866                                  int is_last_slice_group,
1867                                  int intra_slice,
1868                                  int qp,
1869                                  struct intel_batchbuffer *batch)
1870 {
1871     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1872
1873     if (batch == NULL)
1874         batch = encoder_context->base.batch;
1875
1876     BEGIN_BCS_BATCH(batch, 8);
1877
1878     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1879     OUT_BCS_BATCH(batch,
1880                   0 << 31 |                             /* MbRateCtrlFlag */
1881                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1882                   1 << 17 |                             /* Insert Header before the first slice group data */
1883                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1884                   1 << 15 |                             /* TailPresentFlag: always 1 */
1885                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1886                   !!intra_slice << 13 |                 /* IntraSlice */
1887                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1888                   0);
1889     OUT_BCS_BATCH(batch,
1890                   next_y << 24 |
1891                   next_x << 16 |
1892                   y << 8 |
1893                   x << 0 |
1894                   0);
1895     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1896     /* bitstream pointer is only loaded once for the first slice of a frame when 
1897      * LoadSlicePointerFlag is 0
1898      */
1899     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1900     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1901     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1902     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1903
1904     ADVANCE_BCS_BATCH(batch);
1905 }
1906
1907 static int
1908 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1909                                  struct intel_encoder_context *encoder_context,
1910                                  int x, int y,
1911                                  int first_mb_in_slice,
1912                                  int last_mb_in_slice,
1913                                  int first_mb_in_slice_group,
1914                                  int last_mb_in_slice_group,
1915                                  int mb_type,
1916                                  int qp_scale_code,
1917                                  int coded_block_pattern,
1918                                  unsigned char target_size_in_word,
1919                                  unsigned char max_size_in_word,
1920                                  struct intel_batchbuffer *batch)
1921 {
1922     int len_in_dwords = 9;
1923
1924     if (batch == NULL)
1925         batch = encoder_context->base.batch;
1926
1927     BEGIN_BCS_BATCH(batch, len_in_dwords);
1928
1929     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1930     OUT_BCS_BATCH(batch,
1931                   0 << 24 |     /* PackedMvNum */
1932                   0 << 20 |     /* MvFormat */
1933                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1934                   0 << 15 |     /* TransformFlag: frame DCT */
1935                   0 << 14 |     /* FieldMbFlag */
1936                   1 << 13 |     /* IntraMbFlag */
1937                   mb_type << 8 |   /* MbType: Intra */
1938                   0 << 2 |      /* SkipMbFlag */
1939                   0 << 0 |      /* InterMbMode */
1940                   0);
1941     OUT_BCS_BATCH(batch, y << 16 | x);
1942     OUT_BCS_BATCH(batch,
1943                   max_size_in_word << 24 |
1944                   target_size_in_word << 16 |
1945                   coded_block_pattern << 6 |      /* CBP */
1946                   0);
1947     OUT_BCS_BATCH(batch,
1948                   last_mb_in_slice << 31 |
1949                   first_mb_in_slice << 30 |
1950                   0 << 27 |     /* EnableCoeffClamp */
1951                   last_mb_in_slice_group << 26 |
1952                   0 << 25 |     /* MbSkipConvDisable */
1953                   first_mb_in_slice_group << 24 |
1954                   0 << 16 |     /* MvFieldSelect */
1955                   qp_scale_code << 0 |
1956                   0);
1957     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1958     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1959     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1960     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1961
1962     ADVANCE_BCS_BATCH(batch);
1963
1964     return len_in_dwords;
1965 }
1966
1967 #define MPEG2_INTER_MV_OFFSET   12 
1968
1969 static struct _mv_ranges
1970 {
1971     int low;    /* in the unit of 1/2 pixel */
1972     int high;   /* in the unit of 1/2 pixel */
1973 } mv_ranges[] = {
1974     {0, 0},
1975     {-16, 15},
1976     {-32, 31},
1977     {-64, 63},
1978     {-128, 127},
1979     {-256, 255},
1980     {-512, 511},
1981     {-1024, 1023},
1982     {-2048, 2047},
1983     {-4096, 4095}
1984 };
1985
1986 static int
1987 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1988 {
1989     if (mv + pos * 16 * 2 < 0 ||
1990         mv + (pos + 1) * 16 * 2 > display_max * 2)
1991         mv = 0;
1992
1993     if (f_code > 0 && f_code < 10) {
1994         if (mv < mv_ranges[f_code].low)
1995             mv = mv_ranges[f_code].low;
1996
1997         if (mv > mv_ranges[f_code].high)
1998             mv = mv_ranges[f_code].high;
1999     }
2000
2001     return mv;
2002 }
2003
2004 static int
2005 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2006                                  struct encode_state *encode_state,
2007                                  struct intel_encoder_context *encoder_context,
2008                                  unsigned int *msg,
2009                                  int width_in_mbs, int height_in_mbs,
2010                                  int x, int y,
2011                                  int first_mb_in_slice,
2012                                  int last_mb_in_slice,
2013                                  int first_mb_in_slice_group,
2014                                  int last_mb_in_slice_group,
2015                                  int qp_scale_code,
2016                                  unsigned char target_size_in_word,
2017                                  unsigned char max_size_in_word,
2018                                  struct intel_batchbuffer *batch)
2019 {
2020     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2021     int len_in_dwords = 9;
2022     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2023     
2024     if (batch == NULL)
2025         batch = encoder_context->base.batch;
2026
2027     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2028     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2029     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2030     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2031     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2032
2033     BEGIN_BCS_BATCH(batch, len_in_dwords);
2034
2035     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2036     OUT_BCS_BATCH(batch,
2037                   2 << 24 |     /* PackedMvNum */
2038                   7 << 20 |     /* MvFormat */
2039                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2040                   0 << 15 |     /* TransformFlag: frame DCT */
2041                   0 << 14 |     /* FieldMbFlag */
2042                   0 << 13 |     /* IntraMbFlag */
2043                   1 << 8 |      /* MbType: Frame-based */
2044                   0 << 2 |      /* SkipMbFlag */
2045                   0 << 0 |      /* InterMbMode */
2046                   0);
2047     OUT_BCS_BATCH(batch, y << 16 | x);
2048     OUT_BCS_BATCH(batch,
2049                   max_size_in_word << 24 |
2050                   target_size_in_word << 16 |
2051                   0x3f << 6 |   /* CBP */
2052                   0);
2053     OUT_BCS_BATCH(batch,
2054                   last_mb_in_slice << 31 |
2055                   first_mb_in_slice << 30 |
2056                   0 << 27 |     /* EnableCoeffClamp */
2057                   last_mb_in_slice_group << 26 |
2058                   0 << 25 |     /* MbSkipConvDisable */
2059                   first_mb_in_slice_group << 24 |
2060                   0 << 16 |     /* MvFieldSelect */
2061                   qp_scale_code << 0 |
2062                   0);
2063
2064     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2065     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2066     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2067     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2068
2069     ADVANCE_BCS_BATCH(batch);
2070
2071     return len_in_dwords;
2072 }
2073
2074 static void
2075 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2076                                            struct encode_state *encode_state,
2077                                            struct intel_encoder_context *encoder_context,
2078                                            struct intel_batchbuffer *slice_batch)
2079 {
2080     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2081     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2082
2083     if (encode_state->packed_header_data[idx]) {
2084         VAEncPackedHeaderParameterBuffer *param = NULL;
2085         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2086         unsigned int length_in_bits;
2087
2088         assert(encode_state->packed_header_param[idx]);
2089         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2090         length_in_bits = param->bit_length;
2091
2092         mfc_context->insert_object(ctx,
2093                                    encoder_context,
2094                                    header_data,
2095                                    ALIGN(length_in_bits, 32) >> 5,
2096                                    length_in_bits & 0x1f,
2097                                    5,   /* FIXME: check it */
2098                                    0,
2099                                    0,
2100                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2101                                    slice_batch);
2102     }
2103
2104     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2105
2106     if (encode_state->packed_header_data[idx]) {
2107         VAEncPackedHeaderParameterBuffer *param = NULL;
2108         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2109         unsigned int length_in_bits;
2110
2111         assert(encode_state->packed_header_param[idx]);
2112         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2113         length_in_bits = param->bit_length;
2114
2115         mfc_context->insert_object(ctx,
2116                                    encoder_context,
2117                                    header_data,
2118                                    ALIGN(length_in_bits, 32) >> 5,
2119                                    length_in_bits & 0x1f,
2120                                    5,   /* FIXME: check it */
2121                                    0,
2122                                    0,
2123                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2124                                    slice_batch);
2125     }
2126 }
2127
2128 static void 
2129 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2130                                      struct encode_state *encode_state,
2131                                      struct intel_encoder_context *encoder_context,
2132                                      int slice_index,
2133                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2134                                      struct intel_batchbuffer *slice_batch)
2135 {
2136     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2137     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2138     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2139     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2140     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2141     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2142     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2143     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2144     int i, j;
2145     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2146     unsigned int *msg = NULL;
2147     unsigned char *msg_ptr = NULL;
2148
2149     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2150     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2151     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2152     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2153
2154     dri_bo_map(vme_context->vme_output.bo , 0);
2155     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2156
2157     if (next_slice_group_param) {
2158         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2159         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2160     } else {
2161         h_next_start_pos = 0;
2162         v_next_start_pos = height_in_mbs;
2163     }
2164
2165     gen75_mfc_mpeg2_slicegroup_state(ctx,
2166                                      encoder_context,
2167                                      h_start_pos,
2168                                      v_start_pos,
2169                                      h_next_start_pos,
2170                                      v_next_start_pos,
2171                                      slice_index == 0,
2172                                      next_slice_group_param == NULL,
2173                                      slice_param->is_intra_slice,
2174                                      slice_param->quantiser_scale_code,
2175                                      slice_batch);
2176
2177     if (slice_index == 0) 
2178         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2179
2180     /* Insert '00' to make sure the header is valid */
2181     mfc_context->insert_object(ctx,
2182                                encoder_context,
2183                                (unsigned int*)section_delimiter,
2184                                1,
2185                                8,   /* 8bits in the last DWORD */
2186                                1,   /* 1 byte */
2187                                1,
2188                                0,
2189                                0,
2190                                slice_batch);
2191
2192     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2193         /* PAK for each macroblocks */
2194         for (j = 0; j < slice_param->num_macroblocks; j++) {
2195             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2196             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2197             int first_mb_in_slice = (j == 0);
2198             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2199             int first_mb_in_slice_group = (i == 0 && j == 0);
2200             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2201                                           j == slice_param->num_macroblocks - 1);
2202
2203             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2204
2205             if (slice_param->is_intra_slice) {
2206                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2207                                                  encoder_context,
2208                                                  h_pos, v_pos,
2209                                                  first_mb_in_slice,
2210                                                  last_mb_in_slice,
2211                                                  first_mb_in_slice_group,
2212                                                  last_mb_in_slice_group,
2213                                                  0x1a,
2214                                                  slice_param->quantiser_scale_code,
2215                                                  0x3f,
2216                                                  0,
2217                                                  0xff,
2218                                                  slice_batch);
2219             } else {
2220                 int inter_rdo, intra_rdo;
2221                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2222                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2223
2224                 if (intra_rdo < inter_rdo) 
2225                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2226                                                      encoder_context,
2227                                                      h_pos, v_pos,
2228                                                      first_mb_in_slice,
2229                                                      last_mb_in_slice,
2230                                                      first_mb_in_slice_group,
2231                                                      last_mb_in_slice_group,
2232                                                      0x1a,
2233                                                      slice_param->quantiser_scale_code,
2234                                                      0x3f,
2235                                                      0,
2236                                                      0xff,
2237                                                      slice_batch);
2238                 else
2239                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2240                                                      encode_state,
2241                                                      encoder_context,
2242                                                      msg,
2243                                                      width_in_mbs, height_in_mbs,
2244                                                      h_pos, v_pos,
2245                                                      first_mb_in_slice,
2246                                                      last_mb_in_slice,
2247                                                      first_mb_in_slice_group,
2248                                                      last_mb_in_slice_group,
2249                                                      slice_param->quantiser_scale_code,
2250                                                      0,
2251                                                      0xff,
2252                                                      slice_batch);
2253             }
2254         }
2255
2256         slice_param++;
2257     }
2258
2259     dri_bo_unmap(vme_context->vme_output.bo);
2260
2261     /* tail data */
2262     if (next_slice_group_param == NULL) { /* end of a picture */
2263         mfc_context->insert_object(ctx,
2264                                    encoder_context,
2265                                    (unsigned int *)tail_delimiter,
2266                                    2,
2267                                    8,   /* 8bits in the last DWORD */
2268                                    5,   /* 5 bytes */
2269                                    1,
2270                                    1,
2271                                    0,
2272                                    slice_batch);
2273     } else {        /* end of a lsice group */
2274         mfc_context->insert_object(ctx,
2275                                    encoder_context,
2276                                    (unsigned int *)section_delimiter,
2277                                    1,
2278                                    8,   /* 8bits in the last DWORD */
2279                                    1,   /* 1 byte */
2280                                    1,
2281                                    1,
2282                                    0,
2283                                    slice_batch);
2284     }
2285 }
2286
2287 /* 
2288  * A batch buffer for all slices, including slice state, 
2289  * slice insert object and slice pak object commands
2290  *
2291  */
2292 static dri_bo *
2293 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2294                                            struct encode_state *encode_state,
2295                                            struct intel_encoder_context *encoder_context)
2296 {
2297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2298     struct i965_driver_data *i965 = i965_driver_data(ctx);
2299     struct intel_batchbuffer *batch;
2300     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2301     dri_bo *batch_bo;
2302     int i;
2303
2304     batch = mfc_context->aux_batchbuffer;
2305     batch_bo = batch->buffer;
2306
2307     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2308         if (i == encode_state->num_slice_params_ext - 1)
2309             next_slice_group_param = NULL;
2310         else
2311             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2312
2313         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2314     }
2315
2316     intel_batchbuffer_align(batch, 8);
2317     
2318     BEGIN_BCS_BATCH(batch, 2);
2319     OUT_BCS_BATCH(batch, 0);
2320     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2321     ADVANCE_BCS_BATCH(batch);
2322
2323     dri_bo_reference(batch_bo);
2324     intel_batchbuffer_free(batch);
2325     mfc_context->aux_batchbuffer = NULL;
2326
2327     return batch_bo;
2328 }
2329
2330 static void
2331 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2332                                             struct encode_state *encode_state,
2333                                             struct intel_encoder_context *encoder_context)
2334 {
2335     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2336
2337     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2338     mfc_context->set_surface_state(ctx, encoder_context);
2339     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2340     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2341     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2342     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2343     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2344     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2345 }
2346
2347 static void
2348 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2349                                     struct encode_state *encode_state,
2350                                     struct intel_encoder_context *encoder_context)
2351 {
2352     struct intel_batchbuffer *batch = encoder_context->base.batch;
2353     dri_bo *slice_batch_bo;
2354
2355     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2356
2357     // begin programing
2358     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2359     intel_batchbuffer_emit_mi_flush(batch);
2360     
2361     // picture level programing
2362     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2363
2364     BEGIN_BCS_BATCH(batch, 2);
2365     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2366     OUT_BCS_RELOC(batch,
2367                   slice_batch_bo,
2368                   I915_GEM_DOMAIN_COMMAND, 0, 
2369                   0);
2370     ADVANCE_BCS_BATCH(batch);
2371
2372     // end programing
2373     intel_batchbuffer_end_atomic(batch);
2374
2375     dri_bo_unreference(slice_batch_bo);
2376 }
2377
2378 static VAStatus
2379 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2380                         struct encode_state *encode_state,
2381                         struct intel_encoder_context *encoder_context)
2382 {
2383     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2384     struct object_surface *obj_surface; 
2385     struct object_buffer *obj_buffer;
2386     struct i965_coded_buffer_segment *coded_buffer_segment;
2387     VAStatus vaStatus = VA_STATUS_SUCCESS;
2388     dri_bo *bo;
2389     int i;
2390
2391     /* reconstructed surface */
2392     obj_surface = encode_state->reconstructed_object;
2393     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2394     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2395     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2396     mfc_context->surface_state.width = obj_surface->orig_width;
2397     mfc_context->surface_state.height = obj_surface->orig_height;
2398     mfc_context->surface_state.w_pitch = obj_surface->width;
2399     mfc_context->surface_state.h_pitch = obj_surface->height;
2400
2401     /* forward reference */
2402     obj_surface = encode_state->reference_objects[0];
2403
2404     if (obj_surface && obj_surface->bo) {
2405         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2406         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2407     } else
2408         mfc_context->reference_surfaces[0].bo = NULL;
2409
2410     /* backward reference */
2411     obj_surface = encode_state->reference_objects[1];
2412
2413     if (obj_surface && obj_surface->bo) {
2414         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2415         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2416     } else {
2417         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2418
2419         if (mfc_context->reference_surfaces[1].bo)
2420             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2421     }
2422
2423     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2424         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2425
2426         if (mfc_context->reference_surfaces[i].bo)
2427             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2428     }
2429     
2430     /* input YUV surface */
2431     obj_surface = encode_state->input_yuv_object;
2432     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2433     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2434
2435     /* coded buffer */
2436     obj_buffer = encode_state->coded_buf_object;
2437     bo = obj_buffer->buffer_store->bo;
2438     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2439     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2440     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2441     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2442
2443     /* set the internal flag to 0 to indicate the coded size is unknown */
2444     dri_bo_map(bo, 1);
2445     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2446     coded_buffer_segment->mapped = 0;
2447     coded_buffer_segment->codec = encoder_context->codec;
2448     dri_bo_unmap(bo);
2449
2450     return vaStatus;
2451 }
2452
2453 static VAStatus
2454 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2455                                struct encode_state *encode_state,
2456                                struct intel_encoder_context *encoder_context)
2457 {
2458     gen75_mfc_init(ctx, encode_state, encoder_context);
2459     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2460     /*Programing bcs pipeline*/
2461     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2462     gen75_mfc_run(ctx, encode_state, encoder_context);
2463
2464     return VA_STATUS_SUCCESS;
2465 }
2466
2467 static void
2468 gen75_mfc_context_destroy(void *context)
2469 {
2470     struct gen6_mfc_context *mfc_context = context;
2471     int i;
2472
2473     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2474     mfc_context->post_deblocking_output.bo = NULL;
2475
2476     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2477     mfc_context->pre_deblocking_output.bo = NULL;
2478
2479     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2480     mfc_context->uncompressed_picture_source.bo = NULL;
2481
2482     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2483     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2484
2485     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2486         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2487         mfc_context->direct_mv_buffers[i].bo = NULL;
2488     }
2489
2490     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2491     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2492
2493     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2494     mfc_context->macroblock_status_buffer.bo = NULL;
2495
2496     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2497     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2498
2499     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2500     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2501
2502     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2503         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2504         mfc_context->reference_surfaces[i].bo = NULL;  
2505     }
2506
2507     i965_gpe_context_destroy(&mfc_context->gpe_context);
2508
2509     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2510     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2511
2512     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2513     mfc_context->aux_batchbuffer_surface.bo = NULL;
2514
2515     if (mfc_context->aux_batchbuffer)
2516         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2517
2518     mfc_context->aux_batchbuffer = NULL;
2519
2520     free(mfc_context);
2521 }
2522
2523 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2524                                    VAProfile profile,
2525                                    struct encode_state *encode_state,
2526                                    struct intel_encoder_context *encoder_context)
2527 {
2528     VAStatus vaStatus;
2529
2530     switch (profile) {
2531     case VAProfileH264Baseline:
2532     case VAProfileH264Main:
2533     case VAProfileH264High:
2534         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2535         break;
2536
2537         /* FIXME: add for other profile */
2538     case VAProfileMPEG2Simple:
2539     case VAProfileMPEG2Main:
2540         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2541         break;
2542
2543     default:
2544         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2545         break;
2546     }
2547
2548     return vaStatus;
2549 }
2550
2551 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2552 {
2553     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2554
2555     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2556
2557     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2558     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2559
2560     mfc_context->gpe_context.curbe.length = 32 * 4;
2561
2562     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2563     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2564     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2565     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2566     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2567
2568     i965_gpe_load_kernels(ctx,
2569                           &mfc_context->gpe_context,
2570                           gen75_mfc_kernels,
2571                           1);
2572
2573     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2574     mfc_context->set_surface_state = gen75_mfc_surface_state;
2575     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2576     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2577     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2578     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2579     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2580     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2581
2582     encoder_context->mfc_context = mfc_context;
2583     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2584     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2585     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2586
2587     return True;
2588 }