ffd38ea8ff7657e56b3095e1dd99cd857109ac94
[profile/ivi/vaapi-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94
95     assert(standard_select == MFX_FORMAT_MPEG2 ||
96            standard_select == MFX_FORMAT_AVC);
97
98     BEGIN_BCS_BATCH(batch, 5);
99
100     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
101     OUT_BCS_BATCH(batch,
102                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103                   (MFD_MODE_VLD << 15) | /* VLD mode */
104                   (0 << 10) | /* Stream-Out Enable */
105                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
106                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
107                   (0 << 5)  | /* not in stitch mode */
108                   (1 << 4)  | /* encoding mode */
109                   (standard_select << 0));  /* standard select: avc or mpeg2 */
110     OUT_BCS_BATCH(batch,
111                   (0 << 7)  | /* expand NOA bus flag */
112                   (0 << 6)  | /* disable slice-level clock gating */
113                   (0 << 5)  | /* disable clock gating for NOA */
114                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
115                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
116                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
117                   (0 << 1)  |
118                   (0 << 0));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch, 0);
121
122     ADVANCE_BCS_BATCH(batch);
123 }
124
125 static void
126 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
127 {
128     struct intel_batchbuffer *batch = encoder_context->base.batch;
129     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
130
131     BEGIN_BCS_BATCH(batch, 6);
132
133     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch,
136                   ((mfc_context->surface_state.height - 1) << 18) |
137                   ((mfc_context->surface_state.width - 1) << 4));
138     OUT_BCS_BATCH(batch,
139                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
140                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
141                   (0 << 22) | /* surface object control state, FIXME??? */
142                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
143                   (0 << 2)  | /* must be 0 for interleave U/V */
144                   (1 << 1)  | /* must be tiled */
145                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
146     OUT_BCS_BATCH(batch,
147                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
148                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
149     OUT_BCS_BATCH(batch, 0);
150
151     ADVANCE_BCS_BATCH(batch);
152 }
153
154 static void
155 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
156                                 struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 26);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
165         /* the DW1-3 is for the MFX indirect bistream offset */
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169         /* the DW4-5 is the MFX upper bound */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW6-10 is for MFX Indirect MV Object Base Address */
174     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
178     OUT_BCS_BATCH(batch, 0);
179
180      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201         
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
206     OUT_BCS_BATCH(batch, 0);
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
213 {
214     struct intel_batchbuffer *batch = encoder_context->base.batch;
215     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
216     struct gen6_vme_context *vme_context = encoder_context->vme_context;
217     struct i965_driver_data *i965 = i965_driver_data(ctx);
218
219     if (IS_STEPPING_BPLUS(i965)) {
220         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221         return;
222     }
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     OUT_BCS_BATCH(batch,
263                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
264     OUT_BCS_BATCH(batch, 
265                   ((height_in_mbs - 1) << 16) | 
266                   ((width_in_mbs - 1) << 0));
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
293     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
294                   (0xBB8 << 16) |       /* InterMbMaxSz */
295                   (0xEE8) );            /* IntraMbMaxSz */
296     OUT_BCS_BATCH(batch, 0);            /* Reserved */
297     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
298     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
299     OUT_BCS_BATCH(batch, 0x8C000000);
300     OUT_BCS_BATCH(batch, 0x00010000);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305
306     ADVANCE_BCS_BATCH(batch);
307 }
308
309 static void
310 gen75_mfc_qm_state(VADriverContextP ctx,
311                   int qm_type,
312                   unsigned int *qm,
313                   int qm_length,
314                   struct intel_encoder_context *encoder_context)
315 {
316     struct intel_batchbuffer *batch = encoder_context->base.batch;
317     unsigned int qm_buffer[16];
318
319     assert(qm_length <= 16);
320     assert(sizeof(*qm) == 4);
321     memcpy(qm_buffer, qm, qm_length * 4);
322
323     BEGIN_BCS_BATCH(batch, 18);
324     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
325     OUT_BCS_BATCH(batch, qm_type << 0);
326     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
327     ADVANCE_BCS_BATCH(batch);
328 }
329
330 static void
331 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
332 {
333     unsigned int qm[16] = {
334         0x10101010, 0x10101010, 0x10101010, 0x10101010,
335         0x10101010, 0x10101010, 0x10101010, 0x10101010,
336         0x10101010, 0x10101010, 0x10101010, 0x10101010,
337         0x10101010, 0x10101010, 0x10101010, 0x10101010
338     };
339
340     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
341     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
342     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
343     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
344 }
345
346 static void
347 gen75_mfc_fqm_state(VADriverContextP ctx,
348                    int fqm_type,
349                    unsigned int *fqm,
350                    int fqm_length,
351                    struct intel_encoder_context *encoder_context)
352 {
353     struct intel_batchbuffer *batch = encoder_context->base.batch;
354     unsigned int fqm_buffer[32];
355
356     assert(fqm_length <= 32);
357     assert(sizeof(*fqm) == 4);
358     memcpy(fqm_buffer, fqm, fqm_length * 4);
359
360     BEGIN_BCS_BATCH(batch, 34);
361     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
362     OUT_BCS_BATCH(batch, fqm_type << 0);
363     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
369 {
370     unsigned int qm[32] = {
371         0x10001000, 0x10001000, 0x10001000, 0x10001000,
372         0x10001000, 0x10001000, 0x10001000, 0x10001000,
373         0x10001000, 0x10001000, 0x10001000, 0x10001000,
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000
379     };
380
381     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
382     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
383     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
384     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
385 }
386
387 static void
388 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
389                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
390                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
391                            struct intel_batchbuffer *batch)
392 {
393     if (batch == NULL)
394         batch = encoder_context->base.batch;
395
396     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
397
398     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
399     OUT_BCS_BATCH(batch,
400                   (0 << 16) |   /* always start at offset 0 */
401                   (data_bits_in_last_dw << 8) |
402                   (skip_emul_byte_count << 4) |
403                   (!!emulation_flag << 3) |
404                   ((!!is_last_header) << 2) |
405                   ((!!is_end_of_slice) << 1) |
406                   (0 << 0));    /* FIXME: ??? */
407     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
408
409     ADVANCE_BCS_BATCH(batch);
410 }
411
412
413 static void gen75_mfc_init(VADriverContextP ctx,
414                         struct encode_state *encode_state,
415                         struct intel_encoder_context *encoder_context)
416 {
417     struct i965_driver_data *i965 = i965_driver_data(ctx);
418     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
419     dri_bo *bo;
420     int i;
421     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
422     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
423     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
424
425     /*Encode common setup for MFC*/
426     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
427     mfc_context->post_deblocking_output.bo = NULL;
428
429     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
430     mfc_context->pre_deblocking_output.bo = NULL;
431
432     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
433     mfc_context->uncompressed_picture_source.bo = NULL;
434
435     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
436     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
437
438     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
439         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
440         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
441         mfc_context->direct_mv_buffers[i].bo = NULL;
442     }
443
444     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
445         if (mfc_context->reference_surfaces[i].bo != NULL)
446             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
447         mfc_context->reference_surfaces[i].bo = NULL;  
448     }
449
450     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
451     bo = dri_bo_alloc(i965->intel.bufmgr,
452                       "Buffer",
453                       width_in_mbs * 64,
454                       64);
455     assert(bo);
456     mfc_context->intra_row_store_scratch_buffer.bo = bo;
457
458     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
459     bo = dri_bo_alloc(i965->intel.bufmgr,
460                       "Buffer",
461                       width_in_mbs * height_in_mbs * 16,
462                       64);
463     assert(bo);
464     mfc_context->macroblock_status_buffer.bo = bo;
465
466     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
467     bo = dri_bo_alloc(i965->intel.bufmgr,
468                       "Buffer",
469                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
470                       64);
471     assert(bo);
472     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
473
474     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
475     bo = dri_bo_alloc(i965->intel.bufmgr,
476                       "Buffer",
477                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
478                       0x1000);
479     assert(bo);
480     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
481
482     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
483     mfc_context->mfc_batchbuffer_surface.bo = NULL;
484
485     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
486     mfc_context->aux_batchbuffer_surface.bo = NULL;
487
488     if (mfc_context->aux_batchbuffer)
489         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
490
491     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
492     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
493     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
494     mfc_context->aux_batchbuffer_surface.pitch = 16;
495     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
496     mfc_context->aux_batchbuffer_surface.size_block = 16;
497
498     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
499 }
500
501 static void
502 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
503                                 struct intel_encoder_context *encoder_context)
504 {
505     struct intel_batchbuffer *batch = encoder_context->base.batch;
506     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
507     int i;
508
509     BEGIN_BCS_BATCH(batch, 61);
510
511     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
512
513     /* the DW1-3 is for pre_deblocking */
514     if (mfc_context->pre_deblocking_output.bo)
515         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
516                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
517                       0);
518     else
519         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
520
521         OUT_BCS_BATCH(batch, 0);
522         OUT_BCS_BATCH(batch, 0);
523      /* the DW4-6 is for the post_deblocking */
524
525     if (mfc_context->post_deblocking_output.bo)
526         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
527                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
528                       0);                                                                                       /* post output addr  */ 
529     else
530         OUT_BCS_BATCH(batch, 0);
531         OUT_BCS_BATCH(batch, 0);
532         OUT_BCS_BATCH(batch, 0);
533
534      /* the DW7-9 is for the uncompressed_picture */
535     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
536                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
537                   0); /* uncompressed data */
538
539         OUT_BCS_BATCH(batch, 0);
540         OUT_BCS_BATCH(batch, 0);
541
542      /* the DW10-12 is for the mb status */
543     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
544                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
545                   0); /* StreamOut data*/
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW13-15 is for the intra_row_store_scratch */
550     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0);   
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW16-18 is for the deblocking filter */
557     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563     /* the DW 19-50 is for Reference pictures*/
564     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
565         if ( mfc_context->reference_surfaces[i].bo != NULL) {
566             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
567                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                           0);                   
569         } else {
570             OUT_BCS_BATCH(batch, 0);
571         }
572         OUT_BCS_BATCH(batch, 0);
573     }
574         OUT_BCS_BATCH(batch, 0);
575
576         /* The DW 52-54 is for the MB status buffer */
577     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
578                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
579                   0);                                                                                   /* Macroblock status buffer*/
580         
581         OUT_BCS_BATCH(batch, 0);
582         OUT_BCS_BATCH(batch, 0);
583
584         /* the DW 55-57 is the ILDB buffer */
585         OUT_BCS_BATCH(batch, 0);
586         OUT_BCS_BATCH(batch, 0);
587         OUT_BCS_BATCH(batch, 0);
588
589         /* the DW 58-60 is the second ILDB buffer */
590         OUT_BCS_BATCH(batch, 0);
591         OUT_BCS_BATCH(batch, 0);
592         OUT_BCS_BATCH(batch, 0);
593     ADVANCE_BCS_BATCH(batch);
594 }
595
596 static void
597 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
598 {
599     struct intel_batchbuffer *batch = encoder_context->base.batch;
600     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
601     struct i965_driver_data *i965 = i965_driver_data(ctx);
602     int i;
603
604     if (IS_STEPPING_BPLUS(i965)) {
605         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
606         return;
607     }
608
609     BEGIN_BCS_BATCH(batch, 25);
610
611     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
612
613     if (mfc_context->pre_deblocking_output.bo)
614         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
615                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
616                       0);
617     else
618         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
619
620     if (mfc_context->post_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);                                                                                       /* post output addr  */ 
624     else
625         OUT_BCS_BATCH(batch, 0);
626
627     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
628                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
629                   0);                                                                                   /* uncompressed data */
630     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
631                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                   0);                                                                                   /* StreamOut data*/
633     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
634                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                   0);   
636     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
637                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
638                   0);
639     /* 7..22 Reference pictures*/
640     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
641         if ( mfc_context->reference_surfaces[i].bo != NULL) {
642             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
643                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
644                           0);                   
645         } else {
646             OUT_BCS_BATCH(batch, 0);
647         }
648     }
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* Macroblock status buffer*/
652
653         OUT_BCS_BATCH(batch, 0);
654
655     ADVANCE_BCS_BATCH(batch);
656 }
657
658 static void
659 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
660                                 struct intel_encoder_context *encoder_context)
661 {
662     struct intel_batchbuffer *batch = encoder_context->base.batch;
663     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
664
665     int i;
666
667     BEGIN_BCS_BATCH(batch, 71);
668
669     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
670
671     /* Reference frames and Current frames */
672     /* the DW1-32 is for the direct MV for reference */
673     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
674         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
675             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
676                           I915_GEM_DOMAIN_INSTRUCTION, 0,
677                           0);
678             OUT_BCS_BATCH(batch, 0);
679         } else {
680             OUT_BCS_BATCH(batch, 0);
681             OUT_BCS_BATCH(batch, 0);
682         }
683     }
684         OUT_BCS_BATCH(batch, 0);
685
686         /* the DW34-36 is the MV for the current reference */
687         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
688                           I915_GEM_DOMAIN_INSTRUCTION, 0,
689                           0);
690
691         OUT_BCS_BATCH(batch, 0);
692         OUT_BCS_BATCH(batch, 0);
693
694     /* POL list */
695     for(i = 0; i < 32; i++) {
696         OUT_BCS_BATCH(batch, i/2);
697     }
698     OUT_BCS_BATCH(batch, 0);
699     OUT_BCS_BATCH(batch, 0);
700
701     ADVANCE_BCS_BATCH(batch);
702 }
703
704 static void
705 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
706 {
707     struct intel_batchbuffer *batch = encoder_context->base.batch;
708     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
709     struct i965_driver_data *i965 = i965_driver_data(ctx);
710     int i;
711
712     if (IS_STEPPING_BPLUS(i965)) {
713         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
714         return;
715     }
716
717     BEGIN_BCS_BATCH(batch, 69);
718
719     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
720
721     /* Reference frames and Current frames */
722     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
723         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
724             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
725                           I915_GEM_DOMAIN_INSTRUCTION, 0,
726                           0);
727         } else {
728             OUT_BCS_BATCH(batch, 0);
729         }
730     }
731
732     /* POL list */
733     for(i = 0; i < 32; i++) {
734         OUT_BCS_BATCH(batch, i/2);
735     }
736     OUT_BCS_BATCH(batch, 0);
737     OUT_BCS_BATCH(batch, 0);
738
739     ADVANCE_BCS_BATCH(batch);
740 }
741
742 static void
743 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
744 {
745     struct intel_batchbuffer *batch = encoder_context->base.batch;
746     int i;
747
748     BEGIN_BCS_BATCH(batch, 10);
749     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
750     OUT_BCS_BATCH(batch, 0);                  //Select L0
751     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
752     for(i = 0; i < 7; i++) {
753         OUT_BCS_BATCH(batch, 0x80808080);
754     }   
755     ADVANCE_BCS_BATCH(batch);
756
757     BEGIN_BCS_BATCH(batch, 10);
758     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
759     OUT_BCS_BATCH(batch, 1);                  //Select L1
760     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
761     for(i = 0; i < 7; i++) {
762         OUT_BCS_BATCH(batch, 0x80808080);
763     }   
764     ADVANCE_BCS_BATCH(batch);
765 }
766
767
768 static void
769 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
770                                 struct intel_encoder_context *encoder_context)
771 {
772     struct intel_batchbuffer *batch = encoder_context->base.batch;
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774
775     BEGIN_BCS_BATCH(batch, 10);
776
777     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
778     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
779                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
780                   0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783         
784         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787     OUT_BCS_BATCH(batch, 0);
788
789         /* the DW7-9 is for Bitplane Read Buffer Base Address */
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793
794     ADVANCE_BCS_BATCH(batch);
795 }
796
797 static void
798 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
799 {
800     struct intel_batchbuffer *batch = encoder_context->base.batch;
801     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
802     struct i965_driver_data *i965 = i965_driver_data(ctx);
803
804     if (IS_STEPPING_BPLUS(i965)) {
805         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
806         return;
807     }
808
809     BEGIN_BCS_BATCH(batch, 4);
810
811     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
812     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
813                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
814                   0);
815     OUT_BCS_BATCH(batch, 0);
816     OUT_BCS_BATCH(batch, 0);
817
818     ADVANCE_BCS_BATCH(batch);
819 }
820
821
822 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
823                                       struct encode_state *encode_state,
824                                       struct intel_encoder_context *encoder_context)
825 {
826     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
827
828     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
829     mfc_context->set_surface_state(ctx, encoder_context);
830     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
831     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
832     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
833     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
834     mfc_context->avc_qm_state(ctx, encoder_context);
835     mfc_context->avc_fqm_state(ctx, encoder_context);
836     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
837     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
838 }
839
840
841 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
842                              struct encode_state *encode_state,
843                              struct intel_encoder_context *encoder_context)
844 {
845     struct intel_batchbuffer *batch = encoder_context->base.batch;
846
847     intel_batchbuffer_flush(batch);             //run the pipeline
848
849     return VA_STATUS_SUCCESS;
850 }
851
852
853 static VAStatus
854 gen75_mfc_stop(VADriverContextP ctx, 
855               struct encode_state *encode_state,
856               struct intel_encoder_context *encoder_context,
857               int *encoded_bits_size)
858 {
859     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
860     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
861     VACodedBufferSegment *coded_buffer_segment;
862     
863     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
864     assert(vaStatus == VA_STATUS_SUCCESS);
865     *encoded_bits_size = coded_buffer_segment->size * 8;
866     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
867
868     return VA_STATUS_SUCCESS;
869 }
870
871
872 static void
873 gen75_mfc_avc_slice_state(VADriverContextP ctx,
874                          VAEncPictureParameterBufferH264 *pic_param,
875                          VAEncSliceParameterBufferH264 *slice_param,
876                          struct encode_state *encode_state,
877                          struct intel_encoder_context *encoder_context,
878                          int rate_control_enable,
879                          int qp,
880                          struct intel_batchbuffer *batch)
881 {
882     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
883     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
884     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
885     int beginmb = slice_param->macroblock_address;
886     int endmb = beginmb + slice_param->num_macroblocks;
887     int beginx = beginmb % width_in_mbs;
888     int beginy = beginmb / width_in_mbs;
889     int nextx =  endmb % width_in_mbs;
890     int nexty = endmb / width_in_mbs;
891     int slice_type = slice_param->slice_type;
892     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
893     int bit_rate_control_target, maxQpN, maxQpP;
894     unsigned char correct[6], grow, shrink;
895     int i;
896     int weighted_pred_idc = 0;
897     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
898     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
899
900     if (batch == NULL)
901         batch = encoder_context->base.batch;
902
903     bit_rate_control_target = slice_type;
904     if (slice_type == SLICE_TYPE_SP)
905         bit_rate_control_target = SLICE_TYPE_P;
906     else if (slice_type == SLICE_TYPE_SI)
907         bit_rate_control_target = SLICE_TYPE_I;
908
909     if (slice_type == SLICE_TYPE_P) {
910         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
911     } else if (slice_type == SLICE_TYPE_B) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
913
914         if (weighted_pred_idc == 2) {
915             /* 8.4.3 - Derivation process for prediction weights (8-279) */
916             luma_log2_weight_denom = 5;
917             chroma_log2_weight_denom = 5;
918         }
919     }
920
921     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
922     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
923
924     for (i = 0; i < 6; i++)
925         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
926
927     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
928         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
929     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
930         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
931
932     BEGIN_BCS_BATCH(batch, 11);;
933
934     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
935     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
936
937     if (slice_type == SLICE_TYPE_I) {
938         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
939     } else {
940         OUT_BCS_BATCH(batch,
941                       (1 << 16) |                       /*1 reference frame*/
942                       (chroma_log2_weight_denom << 8) |
943                       (luma_log2_weight_denom << 0));
944     }
945
946     OUT_BCS_BATCH(batch, 
947                   (weighted_pred_idc << 30) |
948                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
949                   (slice_param->disable_deblocking_filter_idc << 27) |
950                   (slice_param->cabac_init_idc << 24) |
951                   (qp<<16) |                    /*Slice Quantization Parameter*/
952                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
953                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
954     OUT_BCS_BATCH(batch,
955                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
956                   (beginx << 16) |
957                   slice_param->macroblock_address );
958     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
959     OUT_BCS_BATCH(batch, 
960                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
961                   (1 << 30) |           /*ResetRateControlCounter*/
962                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
963                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
964                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
965                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
966                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
967                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
968                   (last_slice << 19) |     /*IsLastSlice*/
969                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
970                   (1 << 17) |       /*HeaderPresentFlag*/       
971                   (1 << 16) |       /*SliceData PresentFlag*/
972                   (1 << 15) |       /*TailPresentFlag*/
973                   (1 << 13) |       /*RBSP NAL TYPE*/   
974                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
975     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
976     OUT_BCS_BATCH(batch,
977                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
978                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
979                   (shrink << 8)  |
980                   (grow << 0));   
981     OUT_BCS_BATCH(batch,
982                   (correct[5] << 20) |
983                   (correct[4] << 16) |
984                   (correct[3] << 12) |
985                   (correct[2] << 8) |
986                   (correct[1] << 4) |
987                   (correct[0] << 0));
988     OUT_BCS_BATCH(batch, 0);
989
990     ADVANCE_BCS_BATCH(batch);
991 }
992
993
994 #ifdef MFC_SOFTWARE_HASWELL
995
996 static int
997 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
998                                 int qp,unsigned int *msg,
999                               struct intel_encoder_context *encoder_context,
1000                               unsigned char target_mb_size, unsigned char max_mb_size,
1001                               struct intel_batchbuffer *batch)
1002 {
1003     int len_in_dwords = 12;
1004     unsigned int intra_msg;
1005 #define         INTRA_MSG_FLAG          (1 << 13)
1006 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1007     if (batch == NULL)
1008         batch = encoder_context->base.batch;
1009
1010     BEGIN_BCS_BATCH(batch, len_in_dwords);
1011
1012     intra_msg = msg[0] & 0xC0FF;
1013     intra_msg |= INTRA_MSG_FLAG;
1014     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1015     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 
1019                   (0 << 24) |           /* PackedMvNum, Debug*/
1020                   (0 << 20) |           /* No motion vector */
1021                   (1 << 19) |           /* CbpDcY */
1022                   (1 << 18) |           /* CbpDcU */
1023                   (1 << 17) |           /* CbpDcV */
1024                   intra_msg);
1025
1026     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1027     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1028     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1029
1030     /*Stuff for Intra MB*/
1031     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1032     OUT_BCS_BATCH(batch, msg[2]);       
1033     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1034     
1035     /*MaxSizeInWord and TargetSzieInWord*/
1036     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1037                   (target_mb_size << 16) );
1038
1039     OUT_BCS_BATCH(batch, 0);
1040
1041     ADVANCE_BCS_BATCH(batch);
1042
1043     return len_in_dwords;
1044 }
1045
1046 static int
1047 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1048                               unsigned int *msg, unsigned int offset,
1049                               struct intel_encoder_context *encoder_context,
1050                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1051                               struct intel_batchbuffer *batch)
1052 {
1053     int len_in_dwords = 12;
1054         unsigned int inter_msg = 0;
1055     if (batch == NULL)
1056         batch = encoder_context->base.batch;
1057     {
1058 #define MSG_MV_OFFSET   4
1059         unsigned int *mv_ptr;
1060         mv_ptr = msg + MSG_MV_OFFSET;
1061         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1062          * to convert them to be compatible with the format of AVC_PAK
1063          * command.
1064          */
1065         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1066                 /* MV[0] and MV[2] are replicated */
1067                 mv_ptr[4] = mv_ptr[0];
1068                 mv_ptr[5] = mv_ptr[1];
1069                 mv_ptr[2] = mv_ptr[8];
1070                 mv_ptr[3] = mv_ptr[9];
1071                 mv_ptr[6] = mv_ptr[8]; 
1072                 mv_ptr[7] = mv_ptr[9]; 
1073         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1074                 /* MV[0] and MV[1] are replicated */
1075                 mv_ptr[2] = mv_ptr[0];  
1076                 mv_ptr[3] = mv_ptr[1];
1077                 mv_ptr[4] = mv_ptr[16]; 
1078                 mv_ptr[5] = mv_ptr[17]; 
1079                 mv_ptr[6] = mv_ptr[24];
1080                 mv_ptr[7] = mv_ptr[25];
1081         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1082                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1083                 /* Don't touch MV[0] or MV[1] */
1084                 mv_ptr[2] = mv_ptr[8];
1085                 mv_ptr[3] = mv_ptr[9];
1086                 mv_ptr[4] = mv_ptr[16];
1087                 mv_ptr[5] = mv_ptr[17];
1088                 mv_ptr[6] = mv_ptr[24];
1089                 mv_ptr[7] = mv_ptr[25];
1090         }
1091     }
1092
1093     BEGIN_BCS_BATCH(batch, len_in_dwords);
1094
1095     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1096
1097         inter_msg = 32;
1098         /* MV quantity */
1099         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1100                 if (msg[1] & SUBMB_SHAPE_MASK)
1101                         inter_msg = 128;
1102         }
1103     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1104     OUT_BCS_BATCH(batch, offset);
1105         inter_msg = msg[0] & (0x1F00FFFF);
1106         inter_msg |= INTER_MV8;
1107         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1108         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1109                         (msg[1] & SUBMB_SHAPE_MASK)) {
1110                 inter_msg |= INTER_MV32;
1111         }
1112
1113     OUT_BCS_BATCH(batch, inter_msg);
1114
1115     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1116     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1117 #if 0 
1118     if ( slice_type == SLICE_TYPE_B) {
1119         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1120     } else {
1121         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1122     }
1123 #else
1124     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1125 #endif
1126
1127         inter_msg = msg[1] >> 8;
1128     /*Stuff for Inter MB*/
1129     OUT_BCS_BATCH(batch, inter_msg);        
1130     OUT_BCS_BATCH(batch, 0x0);    
1131     OUT_BCS_BATCH(batch, 0x0);        
1132
1133     /*MaxSizeInWord and TargetSzieInWord*/
1134     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1135                   (target_mb_size << 16) );
1136
1137     OUT_BCS_BATCH(batch, 0x0);    
1138
1139     ADVANCE_BCS_BATCH(batch);
1140
1141     return len_in_dwords;
1142 }
1143
1144 #define         INTRA_RDO_OFFSET        4
1145 #define         INTER_RDO_OFFSET        54
1146 #define         INTER_MSG_OFFSET        52
1147 #define         INTER_MV_OFFSET         224
1148 #define         RDO_MASK                0xFFFF
1149
1150 static void 
1151 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1152                                        struct encode_state *encode_state,
1153                                        struct intel_encoder_context *encoder_context,
1154                                        int slice_index,
1155                                        struct intel_batchbuffer *slice_batch)
1156 {
1157     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1158     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1159     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1160     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1161     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1162     unsigned int *msg = NULL, offset = 0;
1163     unsigned char *msg_ptr = NULL;
1164     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1165     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1166     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1167     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1168     int i,x,y;
1169     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1170     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1171     unsigned char *slice_header = NULL;
1172     int slice_header_length_in_bits = 0;
1173     unsigned int tail_data[] = { 0x0, 0x0 };
1174     int slice_type = pSliceParameter->slice_type;
1175
1176
1177     if (rate_control_mode == VA_RC_CBR) {
1178         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1179         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1180     }
1181
1182     /* only support for 8-bit pixel bit-depth */
1183     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1184     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1185     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1186     assert(qp >= 0 && qp < 52);
1187
1188     gen75_mfc_avc_slice_state(ctx, 
1189                              pPicParameter,
1190                              pSliceParameter,
1191                              encode_state, encoder_context,
1192                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1193
1194     if ( slice_index == 0) 
1195         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196
1197     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1198
1199     // slice hander
1200     mfc_context->insert_object(ctx, encoder_context,
1201                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1202                                5,  /* first 5 bytes are start code + nal unit type */
1203                                1, 0, 1, slice_batch);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220
1221         if (is_intra) {
1222             assert(msg);
1223             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224         } else {
1225             int inter_rdo, intra_rdo;
1226             inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1227             intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1228             offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1229             if (intra_rdo < inter_rdo) { 
1230                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1231             } else {
1232                 msg += INTER_MSG_OFFSET;
1233                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1234             }
1235         }
1236     }
1237    
1238     dri_bo_unmap(vme_context->vme_output.bo);
1239
1240     if ( last_slice ) {    
1241         mfc_context->insert_object(ctx, encoder_context,
1242                                    tail_data, 2, 8,
1243                                    2, 1, 1, 0, slice_batch);
1244     } else {
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 1, 8,
1247                                    1, 1, 1, 0, slice_batch);
1248     }
1249
1250     free(slice_header);
1251
1252 }
1253
1254 static dri_bo *
1255 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1256                                   struct encode_state *encode_state,
1257                                   struct intel_encoder_context *encoder_context)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch;
1261     dri_bo *batch_bo;
1262     int i;
1263     int buffer_size;
1264     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1266     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1267
1268     buffer_size = width_in_mbs * height_in_mbs * 64;
1269     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1270     batch_bo = batch->buffer;
1271     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1272         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1273     }
1274
1275     intel_batchbuffer_align(batch, 8);
1276     
1277     BEGIN_BCS_BATCH(batch, 2);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1280     ADVANCE_BCS_BATCH(batch);
1281
1282     dri_bo_reference(batch_bo);
1283     intel_batchbuffer_free(batch);
1284
1285     return batch_bo;
1286 }
1287
1288 #else
1289
1290 static void
1291 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1292                                     struct encode_state *encode_state,
1293                                     struct intel_encoder_context *encoder_context)
1294
1295 {
1296     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298
1299     assert(vme_context->vme_output.bo);
1300     mfc_context->buffer_suface_setup(ctx,
1301                                      &mfc_context->gpe_context,
1302                                      &vme_context->vme_output,
1303                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1304                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1305     assert(mfc_context->aux_batchbuffer_surface.bo);
1306     mfc_context->buffer_suface_setup(ctx,
1307                                      &mfc_context->gpe_context,
1308                                      &mfc_context->aux_batchbuffer_surface,
1309                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1310                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1311 }
1312
1313 static void
1314 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1315                                      struct encode_state *encode_state,
1316                                      struct intel_encoder_context *encoder_context)
1317
1318 {
1319     struct i965_driver_data *i965 = i965_driver_data(ctx);
1320     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1321     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1322     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1323     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1324     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1325     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1326     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1327     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1328                                                            "MFC batchbuffer",
1329                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1330                                                            0x1000);
1331     mfc_context->buffer_suface_setup(ctx,
1332                                      &mfc_context->gpe_context,
1333                                      &mfc_context->mfc_batchbuffer_surface,
1334                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1335                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1336 }
1337
1338 static void
1339 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1340                                     struct encode_state *encode_state,
1341                                     struct intel_encoder_context *encoder_context)
1342 {
1343     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1344     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1345 }
1346
1347 static void
1348 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1349                                 struct encode_state *encode_state,
1350                                 struct intel_encoder_context *encoder_context)
1351 {
1352     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1353     struct gen6_interface_descriptor_data *desc;   
1354     int i;
1355     dri_bo *bo;
1356
1357     bo = mfc_context->gpe_context.idrt.bo;
1358     dri_bo_map(bo, 1);
1359     assert(bo->virtual);
1360     desc = bo->virtual;
1361
1362     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1363         struct i965_kernel *kernel;
1364
1365         kernel = &mfc_context->gpe_context.kernels[i];
1366         assert(sizeof(*desc) == 32);
1367
1368         /*Setup the descritor table*/
1369         memset(desc, 0, sizeof(*desc));
1370         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1371         desc->desc2.sampler_count = 0;
1372         desc->desc2.sampler_state_pointer = 0;
1373         desc->desc3.binding_table_entry_count = 2;
1374         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1375         desc->desc4.constant_urb_entry_read_offset = 0;
1376         desc->desc4.constant_urb_entry_read_length = 4;
1377                 
1378         /*kernel start*/
1379         dri_bo_emit_reloc(bo,   
1380                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1381                           0,
1382                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1383                           kernel->bo);
1384         desc++;
1385     }
1386
1387     dri_bo_unmap(bo);
1388 }
1389
1390 static void
1391 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1392                                     struct encode_state *encode_state,
1393                                     struct intel_encoder_context *encoder_context)
1394 {
1395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1396     
1397     (void)mfc_context;
1398 }
1399
1400 static void
1401 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1402                                          int index,
1403                                          int head_offset,
1404                                          int batchbuffer_offset,
1405                                          int head_size,
1406                                          int tail_size,
1407                                          int number_mb_cmds,
1408                                          int first_object,
1409                                          int last_object,
1410                                          int last_slice,
1411                                          int mb_x,
1412                                          int mb_y,
1413                                          int width_in_mbs,
1414                                          int qp)
1415 {
1416     BEGIN_BATCH(batch, 12);
1417     
1418     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1419     OUT_BATCH(batch, index);
1420     OUT_BATCH(batch, 0);
1421     OUT_BATCH(batch, 0);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424    
1425     /*inline data */
1426     OUT_BATCH(batch, head_offset);
1427     OUT_BATCH(batch, batchbuffer_offset);
1428     OUT_BATCH(batch, 
1429               head_size << 16 |
1430               tail_size);
1431     OUT_BATCH(batch,
1432               number_mb_cmds << 16 |
1433               first_object << 2 |
1434               last_object << 1 |
1435               last_slice);
1436     OUT_BATCH(batch,
1437               mb_y << 8 |
1438               mb_x);
1439     OUT_BATCH(batch,
1440               qp << 16 |
1441               width_in_mbs);
1442
1443     ADVANCE_BATCH(batch);
1444 }
1445
1446 static void
1447 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1448                                        struct intel_encoder_context *encoder_context,
1449                                        VAEncSliceParameterBufferH264 *slice_param,
1450                                        int head_offset,
1451                                        unsigned short head_size,
1452                                        unsigned short tail_size,
1453                                        int batchbuffer_offset,
1454                                        int qp,
1455                                        int last_slice)
1456 {
1457     struct intel_batchbuffer *batch = encoder_context->base.batch;
1458     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1459     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1460     int total_mbs = slice_param->num_macroblocks;
1461     int number_mb_cmds = 128;
1462     int starting_mb = 0;
1463     int last_object = 0;
1464     int first_object = 1;
1465     int i;
1466     int mb_x, mb_y;
1467     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1468
1469     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1470         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1471         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1472         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1473         assert(mb_x <= 255 && mb_y <= 255);
1474
1475         starting_mb += number_mb_cmds;
1476
1477         gen75_mfc_batchbuffer_emit_object_command(batch,
1478                                                  index,
1479                                                  head_offset,
1480                                                  batchbuffer_offset,
1481                                                  head_size,
1482                                                  tail_size,
1483                                                  number_mb_cmds,
1484                                                  first_object,
1485                                                  last_object,
1486                                                  last_slice,
1487                                                  mb_x,
1488                                                  mb_y,
1489                                                  width_in_mbs,
1490                                                  qp);
1491
1492         if (first_object) {
1493             head_offset += head_size;
1494             batchbuffer_offset += head_size;
1495         }
1496
1497         if (last_object) {
1498             head_offset += tail_size;
1499             batchbuffer_offset += tail_size;
1500         }
1501
1502         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1503
1504         first_object = 0;
1505     }
1506
1507     if (!last_object) {
1508         last_object = 1;
1509         number_mb_cmds = total_mbs % number_mb_cmds;
1510         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1511         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1512         assert(mb_x <= 255 && mb_y <= 255);
1513         starting_mb += number_mb_cmds;
1514
1515         gen75_mfc_batchbuffer_emit_object_command(batch,
1516                                                  index,
1517                                                  head_offset,
1518                                                  batchbuffer_offset,
1519                                                  head_size,
1520                                                  tail_size,
1521                                                  number_mb_cmds,
1522                                                  first_object,
1523                                                  last_object,
1524                                                  last_slice,
1525                                                  mb_x,
1526                                                  mb_y,
1527                                                  width_in_mbs,
1528                                                  qp);
1529     }
1530 }
1531                           
1532 /*
1533  * return size in Owords (16bytes)
1534  */         
1535 static int
1536 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1537                                struct encode_state *encode_state,
1538                                struct intel_encoder_context *encoder_context,
1539                                int slice_index,
1540                                int batchbuffer_offset)
1541 {
1542     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1543     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1544     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1545     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1546     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1547     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1548     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1549     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1550     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1551     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1552     unsigned char *slice_header = NULL;
1553     int slice_header_length_in_bits = 0;
1554     unsigned int tail_data[] = { 0x0, 0x0 };
1555     long head_offset;
1556     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1557     unsigned short head_size, tail_size;
1558     int slice_type = pSliceParameter->slice_type;
1559
1560     if (rate_control_mode == VA_RC_CBR) {
1561         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1562         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1563     }
1564
1565     /* only support for 8-bit pixel bit-depth */
1566     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1567     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1568     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1569     assert(qp >= 0 && qp < 52);
1570
1571     head_offset = old_used / 16;
1572     gen75_mfc_avc_slice_state(ctx,
1573                              pPicParameter,
1574                              pSliceParameter,
1575                              encode_state,
1576                              encoder_context,
1577                              (rate_control_mode == VA_RC_CBR),
1578                              qp,
1579                              slice_batch);
1580
1581     if (slice_index == 0)
1582         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1583
1584     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1585
1586     // slice hander
1587     mfc_context->insert_object(ctx,
1588                                encoder_context,
1589                                (unsigned int *)slice_header,
1590                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1591                                slice_header_length_in_bits & 0x1f,
1592                                5,  /* first 5 bytes are start code + nal unit type */
1593                                1,
1594                                0,
1595                                1,
1596                                slice_batch);
1597     free(slice_header);
1598
1599     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1600     used = intel_batchbuffer_used_size(slice_batch);
1601     head_size = (used - old_used) / 16;
1602     old_used = used;
1603
1604     /* tail */
1605     if (last_slice) {    
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    2,
1610                                    8,
1611                                    2,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     } else {
1617         mfc_context->insert_object(ctx,
1618                                    encoder_context,
1619                                    tail_data,
1620                                    1,
1621                                    8,
1622                                    1,
1623                                    1,
1624                                    1,
1625                                    0,
1626                                    slice_batch);
1627     }
1628
1629     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1630     used = intel_batchbuffer_used_size(slice_batch);
1631     tail_size = (used - old_used) / 16;
1632
1633    
1634     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1635                                            encoder_context,
1636                                            pSliceParameter,
1637                                            head_offset,
1638                                            head_size,
1639                                            tail_size,
1640                                            batchbuffer_offset,
1641                                            qp,
1642                                            last_slice);
1643
1644     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1645 }
1646
1647 static void
1648 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1649                                   struct encode_state *encode_state,
1650                                   struct intel_encoder_context *encoder_context)
1651 {
1652     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1653     struct intel_batchbuffer *batch = encoder_context->base.batch;
1654     int i, size, offset = 0;
1655     intel_batchbuffer_start_atomic(batch, 0x4000); 
1656     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1657
1658     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1659         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1660         offset += size;
1661     }
1662
1663     intel_batchbuffer_end_atomic(batch);
1664     intel_batchbuffer_flush(batch);
1665 }
1666
1667 static void
1668 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1669                                struct encode_state *encode_state,
1670                                struct intel_encoder_context *encoder_context)
1671 {
1672     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1673     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1674     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1675     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1676 }
1677
1678 static dri_bo *
1679 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1680                                   struct encode_state *encode_state,
1681                                   struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1684
1685     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1686     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1687
1688     return mfc_context->mfc_batchbuffer_surface.bo;
1689 }
1690
1691 #endif
1692
1693 static void
1694 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1695                                  struct encode_state *encode_state,
1696                                  struct intel_encoder_context *encoder_context)
1697 {
1698     struct intel_batchbuffer *batch = encoder_context->base.batch;
1699     dri_bo *slice_batch_bo;
1700
1701     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1702         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1703         assert(0);
1704         return; 
1705     }
1706
1707 #ifdef MFC_SOFTWARE_HASWELL
1708     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1709 #else
1710     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1711 #endif
1712
1713     // begin programing
1714     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1715     intel_batchbuffer_emit_mi_flush(batch);
1716     
1717     // picture level programing
1718     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1719
1720     BEGIN_BCS_BATCH(batch, 2);
1721     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1722     OUT_BCS_RELOC(batch,
1723                   slice_batch_bo,
1724                   I915_GEM_DOMAIN_COMMAND, 0, 
1725                   0);
1726     ADVANCE_BCS_BATCH(batch);
1727
1728     // end programing
1729     intel_batchbuffer_end_atomic(batch);
1730
1731     dri_bo_unreference(slice_batch_bo);
1732 }
1733
1734
1735 static VAStatus
1736 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1737                             struct encode_state *encode_state,
1738                             struct intel_encoder_context *encoder_context)
1739 {
1740     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1741     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1742     int current_frame_bits_size;
1743     int sts;
1744  
1745     for (;;) {
1746         gen75_mfc_init(ctx, encode_state, encoder_context);
1747         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1748         /*Programing bcs pipeline*/
1749         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1750         gen75_mfc_run(ctx, encode_state, encoder_context);
1751         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1752             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1753             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1754             if (sts == BRC_NO_HRD_VIOLATION) {
1755                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1756                 break;
1757             }
1758             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1759                 if (!mfc_context->hrd.violation_noted) {
1760                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1761                     mfc_context->hrd.violation_noted = 1;
1762                 }
1763                 return VA_STATUS_SUCCESS;
1764             }
1765         } else {
1766             break;
1767         }
1768     }
1769
1770     return VA_STATUS_SUCCESS;
1771 }
1772
1773 /*
1774  * MPEG-2
1775  */
1776
1777 static const int
1778 va_to_gen75_mpeg2_picture_type[3] = {
1779     1,  /* I */
1780     2,  /* P */
1781     3   /* B */
1782 };
1783
1784 static void
1785 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1786                           struct intel_encoder_context *encoder_context,
1787                           struct encode_state *encode_state)
1788 {
1789     struct intel_batchbuffer *batch = encoder_context->base.batch;
1790     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1791     VAEncPictureParameterBufferMPEG2 *pic_param;
1792     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1793     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1794
1795     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1796     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1797
1798     BEGIN_BCS_BATCH(batch, 13);
1799     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1800     OUT_BCS_BATCH(batch,
1801                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1802                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1803                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1804                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1805                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1806                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1807                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1808                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1809                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1810                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1811                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1812                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1813     OUT_BCS_BATCH(batch,
1814                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1815                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1816                   0);
1817     OUT_BCS_BATCH(batch,
1818                   1 << 31 |     /* slice concealment */
1819                   (height_in_mbs - 1) << 16 |
1820                   (width_in_mbs - 1));
1821     OUT_BCS_BATCH(batch, 0);
1822     OUT_BCS_BATCH(batch, 0);
1823     OUT_BCS_BATCH(batch,
1824                   0xFFF << 16 | /* InterMBMaxSize */
1825                   0xFFF << 0 |  /* IntraMBMaxSize */
1826                   0);
1827     OUT_BCS_BATCH(batch, 0);
1828     OUT_BCS_BATCH(batch, 0);
1829     OUT_BCS_BATCH(batch, 0);
1830     OUT_BCS_BATCH(batch, 0);
1831     OUT_BCS_BATCH(batch, 0);
1832     OUT_BCS_BATCH(batch, 0);
1833     ADVANCE_BCS_BATCH(batch);
1834 }
1835
1836 static void
1837 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1838 {
1839     unsigned char intra_qm[64] = {
1840          8, 16, 19, 22, 26, 27, 29, 34,
1841         16, 16, 22, 24, 27, 29, 34, 37,
1842         19, 22, 26, 27, 29, 34, 34, 38,
1843         22, 22, 26, 27, 29, 34, 37, 40,
1844         22, 26, 27, 29, 32, 35, 40, 48,
1845         26, 27, 29, 32, 35, 40, 48, 58,
1846         26, 27, 29, 34, 38, 46, 56, 69,
1847         27, 29, 35, 38, 46, 56, 69, 83
1848     };
1849
1850     unsigned char non_intra_qm[64] = {
1851         16, 16, 16, 16, 16, 16, 16, 16,
1852         16, 16, 16, 16, 16, 16, 16, 16,
1853         16, 16, 16, 16, 16, 16, 16, 16,
1854         16, 16, 16, 16, 16, 16, 16, 16,
1855         16, 16, 16, 16, 16, 16, 16, 16,
1856         16, 16, 16, 16, 16, 16, 16, 16,
1857         16, 16, 16, 16, 16, 16, 16, 16,
1858         16, 16, 16, 16, 16, 16, 16, 16
1859     };
1860
1861     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1862     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1863 }
1864
1865 static void
1866 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1867 {
1868     unsigned short intra_fqm[64] = {
1869          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1870          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1871          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1872          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1873          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1874          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1875          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1876          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1877     };
1878
1879     unsigned short non_intra_fqm[64] = {
1880         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1881         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1882         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1884         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1885         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1886         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1887         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1888     };
1889
1890     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1891     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1892 }
1893
1894 static void
1895 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1896                                  struct intel_encoder_context *encoder_context,
1897                                  int x, int y,
1898                                  int next_x, int next_y,
1899                                  int is_fisrt_slice_group,
1900                                  int is_last_slice_group,
1901                                  int intra_slice,
1902                                  int qp,
1903                                  struct intel_batchbuffer *batch)
1904 {
1905     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1906
1907     if (batch == NULL)
1908         batch = encoder_context->base.batch;
1909
1910     BEGIN_BCS_BATCH(batch, 8);
1911
1912     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1913     OUT_BCS_BATCH(batch,
1914                   0 << 31 |                             /* MbRateCtrlFlag */
1915                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1916                   1 << 17 |                             /* Insert Header before the first slice group data */
1917                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1918                   1 << 15 |                             /* TailPresentFlag: always 1 */
1919                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1920                   !!intra_slice << 13 |                 /* IntraSlice */
1921                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1922                   0);
1923     OUT_BCS_BATCH(batch,
1924                   next_y << 24 |
1925                   next_x << 16 |
1926                   y << 8 |
1927                   x << 0 |
1928                   0);
1929     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1930     /* bitstream pointer is only loaded once for the first slice of a frame when 
1931      * LoadSlicePointerFlag is 0
1932      */
1933     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1934     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1935     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1936     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1937
1938     ADVANCE_BCS_BATCH(batch);
1939 }
1940
1941 static int
1942 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1943                                  struct intel_encoder_context *encoder_context,
1944                                  int x, int y,
1945                                  int first_mb_in_slice,
1946                                  int last_mb_in_slice,
1947                                  int first_mb_in_slice_group,
1948                                  int last_mb_in_slice_group,
1949                                  int mb_type,
1950                                  int qp_scale_code,
1951                                  int coded_block_pattern,
1952                                  unsigned char target_size_in_word,
1953                                  unsigned char max_size_in_word,
1954                                  struct intel_batchbuffer *batch)
1955 {
1956     int len_in_dwords = 9;
1957
1958     if (batch == NULL)
1959         batch = encoder_context->base.batch;
1960
1961     BEGIN_BCS_BATCH(batch, len_in_dwords);
1962
1963     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1964     OUT_BCS_BATCH(batch,
1965                   0 << 24 |     /* PackedMvNum */
1966                   0 << 20 |     /* MvFormat */
1967                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1968                   0 << 15 |     /* TransformFlag: frame DCT */
1969                   0 << 14 |     /* FieldMbFlag */
1970                   1 << 13 |     /* IntraMbFlag */
1971                   mb_type << 8 |   /* MbType: Intra */
1972                   0 << 2 |      /* SkipMbFlag */
1973                   0 << 0 |      /* InterMbMode */
1974                   0);
1975     OUT_BCS_BATCH(batch, y << 16 | x);
1976     OUT_BCS_BATCH(batch,
1977                   max_size_in_word << 24 |
1978                   target_size_in_word << 16 |
1979                   coded_block_pattern << 6 |      /* CBP */
1980                   0);
1981     OUT_BCS_BATCH(batch,
1982                   last_mb_in_slice << 31 |
1983                   first_mb_in_slice << 30 |
1984                   0 << 27 |     /* EnableCoeffClamp */
1985                   last_mb_in_slice_group << 26 |
1986                   0 << 25 |     /* MbSkipConvDisable */
1987                   first_mb_in_slice_group << 24 |
1988                   0 << 16 |     /* MvFieldSelect */
1989                   qp_scale_code << 0 |
1990                   0);
1991     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1992     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1993     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1994     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1995
1996     ADVANCE_BCS_BATCH(batch);
1997
1998     return len_in_dwords;
1999 }
2000
2001 #define INTRA_RDO_OFFSET        4
2002 #define INTER_RDO_OFFSET        54
2003 #define INTER_MSG_OFFSET        52
2004 #define INTER_MV_OFFSET         224
2005 #define RDO_MASK                0xFFFF
2006
2007 static void
2008 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2009                                            struct encode_state *encode_state,
2010                                            struct intel_encoder_context *encoder_context,
2011                                            struct intel_batchbuffer *slice_batch)
2012 {
2013     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2014     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2015
2016     if (encode_state->packed_header_data[idx]) {
2017         VAEncPackedHeaderParameterBuffer *param = NULL;
2018         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2019         unsigned int length_in_bits;
2020
2021         assert(encode_state->packed_header_param[idx]);
2022         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2023         length_in_bits = param->bit_length;
2024
2025         mfc_context->insert_object(ctx,
2026                                    encoder_context,
2027                                    header_data,
2028                                    ALIGN(length_in_bits, 32) >> 5,
2029                                    length_in_bits & 0x1f,
2030                                    5,   /* FIXME: check it */
2031                                    0,
2032                                    0,
2033                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2034                                    slice_batch);
2035     }
2036
2037     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2038
2039     if (encode_state->packed_header_data[idx]) {
2040         VAEncPackedHeaderParameterBuffer *param = NULL;
2041         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2042         unsigned int length_in_bits;
2043
2044         assert(encode_state->packed_header_param[idx]);
2045         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2046         length_in_bits = param->bit_length;
2047
2048         mfc_context->insert_object(ctx,
2049                                    encoder_context,
2050                                    header_data,
2051                                    ALIGN(length_in_bits, 32) >> 5,
2052                                    length_in_bits & 0x1f,
2053                                    5,   /* FIXME: check it */
2054                                    0,
2055                                    0,
2056                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2057                                    slice_batch);
2058     }
2059 }
2060
2061 static void 
2062 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2063                                      struct encode_state *encode_state,
2064                                      struct intel_encoder_context *encoder_context,
2065                                      int slice_index,
2066                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2067                                      struct intel_batchbuffer *slice_batch)
2068 {
2069     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2070     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2071     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2072     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2073     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2074     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2075     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2076     int i, j;
2077     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2078
2079     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2080     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2081     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2082     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2083
2084     if (next_slice_group_param) {
2085         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2086         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2087     } else {
2088         h_next_start_pos = 0;
2089         v_next_start_pos = height_in_mbs;
2090     }
2091
2092     gen75_mfc_mpeg2_slicegroup_state(ctx,
2093                                      encoder_context,
2094                                      h_start_pos,
2095                                      v_start_pos,
2096                                      h_next_start_pos,
2097                                      v_next_start_pos,
2098                                      slice_index == 0,
2099                                      next_slice_group_param == NULL,
2100                                      slice_param->is_intra_slice,
2101                                      slice_param->quantiser_scale_code,
2102                                      slice_batch);
2103
2104     if (slice_index == 0) 
2105         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2106
2107     /* Insert '00' to make sure the header is valid */
2108     mfc_context->insert_object(ctx,
2109                                encoder_context,
2110                                (unsigned int*)section_delimiter,
2111                                1,
2112                                8,   /* 8bits in the last DWORD */
2113                                1,   /* 1 byte */
2114                                1,
2115                                0,
2116                                0,
2117                                slice_batch);
2118
2119     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2120         /* PAK for each macroblocks */
2121         for (j = 0; j < slice_param->num_macroblocks; j++) {
2122             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2123             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2124             int first_mb_in_slice = (j == 0);
2125             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2126             int first_mb_in_slice_group = (i == 0 && j == 0);
2127             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2128                                           j == slice_param->num_macroblocks - 1);
2129
2130             if (slice_param->is_intra_slice) {
2131                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2132                                                  encoder_context,
2133                                                  h_pos, v_pos,
2134                                                  first_mb_in_slice,
2135                                                  last_mb_in_slice,
2136                                                  first_mb_in_slice_group,
2137                                                  last_mb_in_slice_group,
2138                                                  0x1a,
2139                                                  slice_param->quantiser_scale_code,
2140                                                  0x3f,
2141                                                  0,
2142                                                  0xff,
2143                                                  slice_batch);
2144             } else {
2145                 assert(0);
2146             }
2147         }
2148
2149         slice_param++;
2150     }
2151
2152     /* tail data */
2153     if (next_slice_group_param == NULL) { /* end of a picture */
2154         mfc_context->insert_object(ctx,
2155                                    encoder_context,
2156                                    (unsigned int *)tail_delimiter,
2157                                    2,
2158                                    8,   /* 8bits in the last DWORD */
2159                                    5,   /* 5 bytes */
2160                                    1,
2161                                    1,
2162                                    0,
2163                                    slice_batch);
2164     } else {        /* end of a lsice group */
2165         mfc_context->insert_object(ctx,
2166                                    encoder_context,
2167                                    (unsigned int *)section_delimiter,
2168                                    1,
2169                                    8,   /* 8bits in the last DWORD */
2170                                    1,   /* 1 byte */
2171                                    1,
2172                                    1,
2173                                    0,
2174                                    slice_batch);
2175     }
2176 }
2177
2178 /* 
2179  * A batch buffer for all slices, including slice state, 
2180  * slice insert object and slice pak object commands
2181  *
2182  */
2183 static dri_bo *
2184 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2185                                            struct encode_state *encode_state,
2186                                            struct intel_encoder_context *encoder_context)
2187 {
2188     struct i965_driver_data *i965 = i965_driver_data(ctx);
2189     struct intel_batchbuffer *batch;
2190     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2191     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2192     dri_bo *batch_bo;
2193     int i;
2194     int buffer_size;
2195     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2196     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2197
2198     buffer_size = width_in_mbs * height_in_mbs * 64;
2199     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2200     batch_bo = batch->buffer;
2201
2202     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2203         if (i == encode_state->num_slice_params_ext - 1)
2204             next_slice_group_param = NULL;
2205         else
2206             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2207
2208         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2209     }
2210
2211     intel_batchbuffer_align(batch, 8);
2212     
2213     BEGIN_BCS_BATCH(batch, 2);
2214     OUT_BCS_BATCH(batch, 0);
2215     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2216     ADVANCE_BCS_BATCH(batch);
2217
2218     dri_bo_reference(batch_bo);
2219     intel_batchbuffer_free(batch);
2220
2221     return batch_bo;
2222 }
2223
2224 static void
2225 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2226                                             struct encode_state *encode_state,
2227                                             struct intel_encoder_context *encoder_context)
2228 {
2229     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2230
2231     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2232     mfc_context->set_surface_state(ctx, encoder_context);
2233     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2234     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2235     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2236     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2237     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2238     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2239 }
2240
2241 static void
2242 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2243                                     struct encode_state *encode_state,
2244                                     struct intel_encoder_context *encoder_context)
2245 {
2246     struct intel_batchbuffer *batch = encoder_context->base.batch;
2247     dri_bo *slice_batch_bo;
2248
2249     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2250
2251     // begin programing
2252     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2253     intel_batchbuffer_emit_mi_flush(batch);
2254     
2255     // picture level programing
2256     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2257
2258     BEGIN_BCS_BATCH(batch, 2);
2259     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2260     OUT_BCS_RELOC(batch,
2261                   slice_batch_bo,
2262                   I915_GEM_DOMAIN_COMMAND, 0, 
2263                   0);
2264     ADVANCE_BCS_BATCH(batch);
2265
2266     // end programing
2267     intel_batchbuffer_end_atomic(batch);
2268
2269     dri_bo_unreference(slice_batch_bo);
2270 }
2271
2272 static VAStatus
2273 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2274                         struct encode_state *encode_state,
2275                         struct intel_encoder_context *encoder_context)
2276 {
2277     struct i965_driver_data *i965 = i965_driver_data(ctx);
2278     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2279     struct object_surface *obj_surface; 
2280     struct object_buffer *obj_buffer;
2281     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2282     struct i965_coded_buffer_segment *coded_buffer_segment;
2283     VAStatus vaStatus = VA_STATUS_SUCCESS;
2284     dri_bo *bo;
2285     int i;
2286
2287     /* reconstructed surface */
2288     obj_surface = SURFACE(pic_param->reconstructed_picture);
2289     assert(obj_surface);
2290     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2291     mfc_context->post_deblocking_output.bo = obj_surface->bo;
2292     dri_bo_reference(mfc_context->post_deblocking_output.bo);
2293     mfc_context->surface_state.width = obj_surface->orig_width;
2294     mfc_context->surface_state.height = obj_surface->orig_height;
2295     mfc_context->surface_state.w_pitch = obj_surface->width;
2296     mfc_context->surface_state.h_pitch = obj_surface->height;
2297
2298     /* forward reference */
2299     obj_surface = SURFACE(pic_param->forward_reference_picture);
2300
2301     if (obj_surface && obj_surface->bo) {
2302         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2303         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2304     } else
2305         mfc_context->reference_surfaces[0].bo = NULL;
2306
2307     /* backward reference */
2308     obj_surface = SURFACE(pic_param->backward_reference_picture);
2309
2310     if (obj_surface && obj_surface->bo) {
2311         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2312         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2313     } else {
2314         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2315
2316         if (mfc_context->reference_surfaces[1].bo)
2317             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2318     }
2319
2320     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2321         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2322
2323         if (mfc_context->reference_surfaces[i].bo)
2324             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2325     }
2326     
2327     /* input YUV surface */
2328     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2329     assert(obj_surface && obj_surface->bo);
2330     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2331     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2332
2333     /* coded buffer */
2334     obj_buffer = BUFFER(pic_param->coded_buf);
2335     bo = obj_buffer->buffer_store->bo;
2336     assert(bo);
2337     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2338     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2339     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2340     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2341
2342     /* set the internal flag to 0 to indicate the coded size is unknown */
2343     dri_bo_map(bo, 1);
2344     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2345     coded_buffer_segment->mapped = 0;
2346     coded_buffer_segment->codec = CODED_MPEG2;
2347     dri_bo_unmap(bo);
2348
2349     return vaStatus;
2350 }
2351
2352 static VAStatus
2353 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2354                                struct encode_state *encode_state,
2355                                struct intel_encoder_context *encoder_context)
2356 {
2357     gen75_mfc_init(ctx, encode_state, encoder_context);
2358     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2359     /*Programing bcs pipeline*/
2360     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2361     gen75_mfc_run(ctx, encode_state, encoder_context);
2362
2363     return VA_STATUS_SUCCESS;
2364 }
2365
2366 static void
2367 gen75_mfc_context_destroy(void *context)
2368 {
2369     struct gen6_mfc_context *mfc_context = context;
2370     int i;
2371
2372     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2373     mfc_context->post_deblocking_output.bo = NULL;
2374
2375     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2376     mfc_context->pre_deblocking_output.bo = NULL;
2377
2378     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2379     mfc_context->uncompressed_picture_source.bo = NULL;
2380
2381     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2382     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2383
2384     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2385         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2386         mfc_context->direct_mv_buffers[i].bo = NULL;
2387     }
2388
2389     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2390     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2391
2392     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2393     mfc_context->macroblock_status_buffer.bo = NULL;
2394
2395     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2396     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2397
2398     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2399     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2400
2401
2402     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2403         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2404         mfc_context->reference_surfaces[i].bo = NULL;  
2405     }
2406
2407     i965_gpe_context_destroy(&mfc_context->gpe_context);
2408
2409     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2410     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2411
2412     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2413     mfc_context->aux_batchbuffer_surface.bo = NULL;
2414
2415     if (mfc_context->aux_batchbuffer)
2416         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2417
2418     mfc_context->aux_batchbuffer = NULL;
2419
2420     free(mfc_context);
2421 }
2422
2423 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2424                   VAProfile profile,
2425                   struct encode_state *encode_state,
2426                   struct intel_encoder_context *encoder_context)
2427 {
2428     VAStatus vaStatus;
2429
2430     switch (profile) {
2431     case VAProfileH264Baseline:
2432     case VAProfileH264Main:
2433     case VAProfileH264High:
2434         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2435         break;
2436
2437         /* FIXME: add for other profile */
2438     case VAProfileMPEG2Simple:
2439     case VAProfileMPEG2Main:
2440         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2441         break;
2442
2443     default:
2444         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2445         break;
2446     }
2447
2448     return vaStatus;
2449 }
2450
2451 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2452 {
2453     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2454
2455     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2456
2457     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2458     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2459
2460     mfc_context->gpe_context.curbe.length = 32 * 4;
2461
2462     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2463     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2464     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2465     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2466     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2467
2468     i965_gpe_load_kernels(ctx,
2469                           &mfc_context->gpe_context,
2470                           gen75_mfc_kernels,
2471                           NUM_MFC_KERNEL);
2472
2473     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2474     mfc_context->set_surface_state = gen75_mfc_surface_state;
2475     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2476     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2477     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2478     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2479     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2480     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2481
2482     encoder_context->mfc_context = mfc_context;
2483     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2484     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2485     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2486
2487     return True;
2488 }