H264_encoding: Don't update the slice qp for CBR mode when finding packed slice_heade...
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define MFC_SOFTWARE_HASWELL    0
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
57
58 #define B0_STEP_REV             2
59 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
60
61 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
62 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
63 };
64
65 static struct i965_kernel gen75_mfc_kernels[] = {
66     {
67         "MFC AVC INTRA BATCHBUFFER ",
68         MFC_BATCHBUFFER_AVC_INTRA,
69         gen75_mfc_batchbuffer_avc,
70         sizeof(gen75_mfc_batchbuffer_avc),
71         NULL
72     },
73 };
74
75 #define         INTER_MODE_MASK         0x03
76 #define         INTER_8X8               0x03
77 #define         INTER_16X8              0x01
78 #define         INTER_8X16              0x02
79 #define         SUBMB_SHAPE_MASK        0x00FF00
80
81 #define         INTER_MV8               (4 << 20)
82 #define         INTER_MV32              (6 << 20)
83
84
85 static void
86 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87                            int standard_select,
88                            struct intel_encoder_context *encoder_context)
89 {
90     struct intel_batchbuffer *batch = encoder_context->base.batch;
91     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92     assert(standard_select == MFX_FORMAT_MPEG2 ||
93            standard_select == MFX_FORMAT_AVC);
94
95     BEGIN_BCS_BATCH(batch, 5);
96
97     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
98     OUT_BCS_BATCH(batch,
99                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
100                   (MFD_MODE_VLD << 15) | /* VLD mode */
101                   (0 << 10) | /* Stream-Out Enable */
102                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
103                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
104                   (0 << 5)  | /* not in stitch mode */
105                   (1 << 4)  | /* encoding mode */
106                   (standard_select << 0));  /* standard select: avc or mpeg2 */
107     OUT_BCS_BATCH(batch,
108                   (0 << 7)  | /* expand NOA bus flag */
109                   (0 << 6)  | /* disable slice-level clock gating */
110                   (0 << 5)  | /* disable clock gating for NOA */
111                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
112                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
113                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
114                   (0 << 1)  |
115                   (0 << 0));
116     OUT_BCS_BATCH(batch, 0);
117     OUT_BCS_BATCH(batch, 0);
118
119     ADVANCE_BCS_BATCH(batch);
120 }
121
122 static void
123 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
124 {
125     struct intel_batchbuffer *batch = encoder_context->base.batch;
126     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
127
128     BEGIN_BCS_BATCH(batch, 6);
129
130     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
131     OUT_BCS_BATCH(batch, 0);
132     OUT_BCS_BATCH(batch,
133                   ((mfc_context->surface_state.height - 1) << 18) |
134                   ((mfc_context->surface_state.width - 1) << 4));
135     OUT_BCS_BATCH(batch,
136                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
137                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
138                   (0 << 22) | /* surface object control state, FIXME??? */
139                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
140                   (0 << 2)  | /* must be 0 for interleave U/V */
141                   (1 << 1)  | /* must be tiled */
142                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
143     OUT_BCS_BATCH(batch,
144                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
145                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
146     OUT_BCS_BATCH(batch, 0);
147
148     ADVANCE_BCS_BATCH(batch);
149 }
150
151 static void
152 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
153                                         struct intel_encoder_context *encoder_context)
154 {
155     struct intel_batchbuffer *batch = encoder_context->base.batch;
156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     BEGIN_BCS_BATCH(batch, 26);
160
161     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
162     /* the DW1-3 is for the MFX indirect bistream offset */
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165     OUT_BCS_BATCH(batch, 0);
166     /* the DW4-5 is the MFX upper bound */
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169
170     /* the DW6-10 is for MFX Indirect MV Object Base Address */
171     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
192     OUT_BCS_RELOC(batch,
193                   mfc_context->mfc_indirect_pak_bse_object.bo,
194                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195                   0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198         
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
203     OUT_BCS_BATCH(batch, 0);
204
205     ADVANCE_BCS_BATCH(batch);
206 }
207
208 static void
209 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
210 {
211     struct intel_batchbuffer *batch = encoder_context->base.batch;
212     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
213     struct gen6_vme_context *vme_context = encoder_context->vme_context;
214     struct i965_driver_data *i965 = i965_driver_data(ctx);
215
216     if (IS_STEPPING_BPLUS(i965)) {
217         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
218         return;
219     }
220
221     BEGIN_BCS_BATCH(batch, 11);
222
223     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     /* MFX Indirect MV Object Base Address */
227     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
228     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
234     OUT_BCS_RELOC(batch,
235                   mfc_context->mfc_indirect_pak_bse_object.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   0);
238     OUT_BCS_RELOC(batch,
239                   mfc_context->mfc_indirect_pak_bse_object.bo,
240                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
241                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
242
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
248                         struct intel_encoder_context *encoder_context)
249 {
250     struct intel_batchbuffer *batch = encoder_context->base.batch;
251     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
252     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
253
254     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
255     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
256
257     BEGIN_BCS_BATCH(batch, 16);
258
259     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
260     /*DW1. MB setting of frame */
261     OUT_BCS_BATCH(batch,
262                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
263     OUT_BCS_BATCH(batch, 
264                   ((height_in_mbs - 1) << 16) | 
265                   ((width_in_mbs - 1) << 0));
266     /* DW3 QP setting */
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     /* DW5 Trellis quantization */
293     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
294     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
295                   (0xBB8 << 16) |       /* InterMbMaxSz */
296                   (0xEE8) );            /* IntraMbMaxSz */
297     OUT_BCS_BATCH(batch, 0);            /* Reserved */
298     /* DW8. QP delta */
299     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
300     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
301     /* DW10. Bit setting for MB */
302     OUT_BCS_BATCH(batch, 0x8C000000);
303     OUT_BCS_BATCH(batch, 0x00010000);
304     /* DW12. */
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x02010100);
307     /* DW14. For short format */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310
311     ADVANCE_BCS_BATCH(batch);
312 }
313
314 static void
315 gen75_mfc_qm_state(VADriverContextP ctx,
316                    int qm_type,
317                    unsigned int *qm,
318                    int qm_length,
319                    struct intel_encoder_context *encoder_context)
320 {
321     struct intel_batchbuffer *batch = encoder_context->base.batch;
322     unsigned int qm_buffer[16];
323
324     assert(qm_length <= 16);
325     assert(sizeof(*qm) == 4);
326     memcpy(qm_buffer, qm, qm_length * 4);
327
328     BEGIN_BCS_BATCH(batch, 18);
329     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
330     OUT_BCS_BATCH(batch, qm_type << 0);
331     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335 static void
336 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
337 {
338     unsigned int qm[16] = {
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010,
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010
343     };
344
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
349 }
350
351 static void
352 gen75_mfc_fqm_state(VADriverContextP ctx,
353                     int fqm_type,
354                     unsigned int *fqm,
355                     int fqm_length,
356                     struct intel_encoder_context *encoder_context)
357 {
358     struct intel_batchbuffer *batch = encoder_context->base.batch;
359     unsigned int fqm_buffer[32];
360
361     assert(fqm_length <= 32);
362     assert(sizeof(*fqm) == 4);
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
367     OUT_BCS_BATCH(batch, fqm_type << 0);
368     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
374 {
375     unsigned int qm[32] = {
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000
384     };
385
386     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
387     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
390 }
391
392 static void
393 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
394                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
395                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
396                             struct intel_batchbuffer *batch)
397 {
398     if (batch == NULL)
399         batch = encoder_context->base.batch;
400
401     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
402
403     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
404     OUT_BCS_BATCH(batch,
405                   (0 << 16) |   /* always start at offset 0 */
406                   (data_bits_in_last_dw << 8) |
407                   (skip_emul_byte_count << 4) |
408                   (!!emulation_flag << 3) |
409                   ((!!is_last_header) << 2) |
410                   ((!!is_end_of_slice) << 1) |
411                   (0 << 0));    /* FIXME: ??? */
412     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
413
414     ADVANCE_BCS_BATCH(batch);
415 }
416
417
418 static void gen75_mfc_init(VADriverContextP ctx,
419                            struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     struct i965_driver_data *i965 = i965_driver_data(ctx);
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424     dri_bo *bo;
425     int i;
426     int width_in_mbs = 0;
427     int height_in_mbs = 0;
428     int slice_batchbuffer_size;
429
430     if (encoder_context->codec == CODEC_H264 ||
431         encoder_context->codec == CODEC_H264_MVC) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
445                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
446
447     /*Encode common setup for MFC*/
448     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
449     mfc_context->post_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
452     mfc_context->pre_deblocking_output.bo = NULL;
453
454     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
455     mfc_context->uncompressed_picture_source.bo = NULL;
456
457     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
458     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
459
460     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
461         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
462         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
463         mfc_context->direct_mv_buffers[i].bo = NULL;
464     }
465
466     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
467         if (mfc_context->reference_surfaces[i].bo != NULL)
468             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
469         mfc_context->reference_surfaces[i].bo = NULL;  
470     }
471
472     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
473     bo = dri_bo_alloc(i965->intel.bufmgr,
474                       "Buffer",
475                       width_in_mbs * 64,
476                       64);
477     assert(bo);
478     mfc_context->intra_row_store_scratch_buffer.bo = bo;
479
480     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
481     bo = dri_bo_alloc(i965->intel.bufmgr,
482                       "Buffer",
483                       width_in_mbs * height_in_mbs * 16,
484                       64);
485     assert(bo);
486     mfc_context->macroblock_status_buffer.bo = bo;
487
488     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
489     bo = dri_bo_alloc(i965->intel.bufmgr,
490                       "Buffer",
491                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
492                       64);
493     assert(bo);
494     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
495
496     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
497     bo = dri_bo_alloc(i965->intel.bufmgr,
498                       "Buffer",
499                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
500                       0x1000);
501     assert(bo);
502     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
503
504     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
505     mfc_context->mfc_batchbuffer_surface.bo = NULL;
506
507     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
508     mfc_context->aux_batchbuffer_surface.bo = NULL;
509
510     if (mfc_context->aux_batchbuffer)
511         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
512
513     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
514                                                         slice_batchbuffer_size);
515     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
516     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
517     mfc_context->aux_batchbuffer_surface.pitch = 16;
518     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
519     mfc_context->aux_batchbuffer_surface.size_block = 16;
520
521     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
522 }
523
524 static void
525 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
526                                     struct intel_encoder_context *encoder_context)
527 {
528     struct intel_batchbuffer *batch = encoder_context->base.batch;
529     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 61);
533
534     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
535
536     /* the DW1-3 is for pre_deblocking */
537     if (mfc_context->pre_deblocking_output.bo)
538         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
539                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
540                       0);
541     else
542         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
543
544     OUT_BCS_BATCH(batch, 0);
545     OUT_BCS_BATCH(batch, 0);
546     /* the DW4-6 is for the post_deblocking */
547
548     if (mfc_context->post_deblocking_output.bo)
549         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
550                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                       0);                                                                                       /* post output addr  */ 
552     else
553         OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555     OUT_BCS_BATCH(batch, 0);
556
557     /* the DW7-9 is for the uncompressed_picture */
558     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
559                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0); /* uncompressed data */
561
562     OUT_BCS_BATCH(batch, 0);
563     OUT_BCS_BATCH(batch, 0);
564
565     /* the DW10-12 is for the mb status */
566     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
567                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                   0); /* StreamOut data*/
569     OUT_BCS_BATCH(batch, 0);
570     OUT_BCS_BATCH(batch, 0);
571
572     /* the DW13-15 is for the intra_row_store_scratch */
573     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
574                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                   0);   
576     OUT_BCS_BATCH(batch, 0);
577     OUT_BCS_BATCH(batch, 0);
578
579     /* the DW16-18 is for the deblocking filter */
580     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
581                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
582                   0);
583     OUT_BCS_BATCH(batch, 0);
584     OUT_BCS_BATCH(batch, 0);
585
586     /* the DW 19-50 is for Reference pictures*/
587     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
588         if ( mfc_context->reference_surfaces[i].bo != NULL) {
589             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
590                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
591                           0);                   
592         } else {
593             OUT_BCS_BATCH(batch, 0);
594         }
595         OUT_BCS_BATCH(batch, 0);
596     }
597     OUT_BCS_BATCH(batch, 0);
598
599     /* The DW 52-54 is for the MB status buffer */
600     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
601                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
602                   0);                                                                                   /* Macroblock status buffer*/
603         
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606
607     /* the DW 55-57 is the ILDB buffer */
608     OUT_BCS_BATCH(batch, 0);
609     OUT_BCS_BATCH(batch, 0);
610     OUT_BCS_BATCH(batch, 0);
611
612     /* the DW 58-60 is the second ILDB buffer */
613     OUT_BCS_BATCH(batch, 0);
614     OUT_BCS_BATCH(batch, 0);
615     OUT_BCS_BATCH(batch, 0);
616     ADVANCE_BCS_BATCH(batch);
617 }
618
619 static void
620 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
621 {
622     struct intel_batchbuffer *batch = encoder_context->base.batch;
623     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
624     struct i965_driver_data *i965 = i965_driver_data(ctx);
625     int i;
626
627     if (IS_STEPPING_BPLUS(i965)) {
628         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
629         return;
630     }
631
632     BEGIN_BCS_BATCH(batch, 25);
633
634     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
635
636     if (mfc_context->pre_deblocking_output.bo)
637         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
638                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
639                       0);
640     else
641         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
642
643     if (mfc_context->post_deblocking_output.bo)
644         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
645                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
646                       0);                                                                                       /* post output addr  */ 
647     else
648         OUT_BCS_BATCH(batch, 0);
649
650     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
651                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
652                   0);                                                                                   /* uncompressed data */
653     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
654                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
655                   0);                                                                                   /* StreamOut data*/
656     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);   
659     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
660                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
661                   0);
662     /* 7..22 Reference pictures*/
663     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
664         if ( mfc_context->reference_surfaces[i].bo != NULL) {
665             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
666                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
667                           0);                   
668         } else {
669             OUT_BCS_BATCH(batch, 0);
670         }
671     }
672     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
673                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
674                   0);                                                                                   /* Macroblock status buffer*/
675
676     OUT_BCS_BATCH(batch, 0);
677
678     ADVANCE_BCS_BATCH(batch);
679 }
680
681 static void
682 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
683                                      struct intel_encoder_context *encoder_context)
684 {
685     struct intel_batchbuffer *batch = encoder_context->base.batch;
686     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
687
688     int i;
689
690     BEGIN_BCS_BATCH(batch, 71);
691
692     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
693
694     /* Reference frames and Current frames */
695     /* the DW1-32 is for the direct MV for reference */
696     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
697         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
698             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
699                           I915_GEM_DOMAIN_INSTRUCTION, 0,
700                           0);
701             OUT_BCS_BATCH(batch, 0);
702         } else {
703             OUT_BCS_BATCH(batch, 0);
704             OUT_BCS_BATCH(batch, 0);
705         }
706     }
707     OUT_BCS_BATCH(batch, 0);
708
709     /* the DW34-36 is the MV for the current reference */
710     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
711                   I915_GEM_DOMAIN_INSTRUCTION, 0,
712                   0);
713
714     OUT_BCS_BATCH(batch, 0);
715     OUT_BCS_BATCH(batch, 0);
716
717     /* POL list */
718     for(i = 0; i < 32; i++) {
719         OUT_BCS_BATCH(batch, i/2);
720     }
721     OUT_BCS_BATCH(batch, 0);
722     OUT_BCS_BATCH(batch, 0);
723
724     ADVANCE_BCS_BATCH(batch);
725 }
726
727 static void
728 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
729 {
730     struct intel_batchbuffer *batch = encoder_context->base.batch;
731     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
732     struct i965_driver_data *i965 = i965_driver_data(ctx);
733     int i;
734
735     if (IS_STEPPING_BPLUS(i965)) {
736         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
737         return;
738     }
739
740     BEGIN_BCS_BATCH(batch, 69);
741
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* Reference frames and Current frames */
745     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
746         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
747             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
748                           I915_GEM_DOMAIN_INSTRUCTION, 0,
749                           0);
750         } else {
751             OUT_BCS_BATCH(batch, 0);
752         }
753     }
754
755     /* POL list */
756     for(i = 0; i < 32; i++) {
757         OUT_BCS_BATCH(batch, i/2);
758     }
759     OUT_BCS_BATCH(batch, 0);
760     OUT_BCS_BATCH(batch, 0);
761
762     ADVANCE_BCS_BATCH(batch);
763 }
764
765
766 static void
767 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
768                                         struct intel_encoder_context *encoder_context)
769 {
770     struct intel_batchbuffer *batch = encoder_context->base.batch;
771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
772
773     BEGIN_BCS_BATCH(batch, 10);
774
775     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
776     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
777                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
778                   0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781         
782     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786
787     /* the DW7-9 is for Bitplane Read Buffer Base Address */
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790     OUT_BCS_BATCH(batch, 0);
791
792     ADVANCE_BCS_BATCH(batch);
793 }
794
795 static void
796 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
797 {
798     struct intel_batchbuffer *batch = encoder_context->base.batch;
799     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
800     struct i965_driver_data *i965 = i965_driver_data(ctx);
801
802     if (IS_STEPPING_BPLUS(i965)) {
803         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
804         return;
805     }
806
807     BEGIN_BCS_BATCH(batch, 4);
808
809     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
810     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
811                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
812                   0);
813     OUT_BCS_BATCH(batch, 0);
814     OUT_BCS_BATCH(batch, 0);
815
816     ADVANCE_BCS_BATCH(batch);
817 }
818
819
820 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
821                                                        struct encode_state *encode_state,
822                                                        struct intel_encoder_context *encoder_context)
823 {
824     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
825
826     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
827     mfc_context->set_surface_state(ctx, encoder_context);
828     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
829     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
830     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
831     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
832     mfc_context->avc_qm_state(ctx, encoder_context);
833     mfc_context->avc_fqm_state(ctx, encoder_context);
834     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
835     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
836 }
837
838
839 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
840                               struct encode_state *encode_state,
841                               struct intel_encoder_context *encoder_context)
842 {
843     struct intel_batchbuffer *batch = encoder_context->base.batch;
844
845     intel_batchbuffer_flush(batch);             //run the pipeline
846
847     return VA_STATUS_SUCCESS;
848 }
849
850
851 static VAStatus
852 gen75_mfc_stop(VADriverContextP ctx, 
853                struct encode_state *encode_state,
854                struct intel_encoder_context *encoder_context,
855                int *encoded_bits_size)
856 {
857     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
858     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
859     VACodedBufferSegment *coded_buffer_segment;
860     
861     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
862     assert(vaStatus == VA_STATUS_SUCCESS);
863     *encoded_bits_size = coded_buffer_segment->size * 8;
864     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
865
866     return VA_STATUS_SUCCESS;
867 }
868
869
870 static void
871 gen75_mfc_avc_slice_state(VADriverContextP ctx,
872                           VAEncPictureParameterBufferH264 *pic_param,
873                           VAEncSliceParameterBufferH264 *slice_param,
874                           struct encode_state *encode_state,
875                           struct intel_encoder_context *encoder_context,
876                           int rate_control_enable,
877                           int qp,
878                           struct intel_batchbuffer *batch)
879 {
880     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
881     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
882     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
883     int beginmb = slice_param->macroblock_address;
884     int endmb = beginmb + slice_param->num_macroblocks;
885     int beginx = beginmb % width_in_mbs;
886     int beginy = beginmb / width_in_mbs;
887     int nextx =  endmb % width_in_mbs;
888     int nexty = endmb / width_in_mbs;
889     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
890     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
891     int maxQpN, maxQpP;
892     unsigned char correct[6], grow, shrink;
893     int i;
894     int weighted_pred_idc = 0;
895     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
896     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
897     int num_ref_l0 = 0, num_ref_l1 = 0;
898
899     if (batch == NULL)
900         batch = encoder_context->base.batch;
901
902     if (slice_type == SLICE_TYPE_I) {
903         luma_log2_weight_denom = 0;
904         chroma_log2_weight_denom = 0;
905     } else if (slice_type == SLICE_TYPE_P) {
906         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
907         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
908
909         if (slice_param->num_ref_idx_active_override_flag)
910             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
911     } else if (slice_type == SLICE_TYPE_B) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
913         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
914         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
915
916         if (slice_param->num_ref_idx_active_override_flag) {
917             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
918             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
919         }
920
921         if (weighted_pred_idc == 2) {
922             /* 8.4.3 - Derivation process for prediction weights (8-279) */
923             luma_log2_weight_denom = 5;
924             chroma_log2_weight_denom = 5;
925         }
926     }
927
928     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
929     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
930
931     for (i = 0; i < 6; i++)
932         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
933
934     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
935         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
936     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
937         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
938
939     BEGIN_BCS_BATCH(batch, 11);;
940
941     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
942     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
943
944     OUT_BCS_BATCH(batch,
945                   (num_ref_l0 << 16) |
946                   (num_ref_l1 << 24) |
947                   (chroma_log2_weight_denom << 8) |
948                   (luma_log2_weight_denom << 0));
949
950     OUT_BCS_BATCH(batch, 
951                   (weighted_pred_idc << 30) |
952                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
953                   (slice_param->disable_deblocking_filter_idc << 27) |
954                   (slice_param->cabac_init_idc << 24) |
955                   (qp<<16) |                    /*Slice Quantization Parameter*/
956                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
957                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
958     OUT_BCS_BATCH(batch,
959                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
960                   (beginx << 16) |
961                   slice_param->macroblock_address );
962     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
963     OUT_BCS_BATCH(batch, 
964                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
965                   (1 << 30) |           /*ResetRateControlCounter*/
966                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
967                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
968                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
969                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
970                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
971                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
972                   (last_slice << 19) |     /*IsLastSlice*/
973                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
974                   (1 << 17) |       /*HeaderPresentFlag*/       
975                   (1 << 16) |       /*SliceData PresentFlag*/
976                   (1 << 15) |       /*TailPresentFlag*/
977                   (1 << 13) |       /*RBSP NAL TYPE*/   
978                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
979     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
980     OUT_BCS_BATCH(batch,
981                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
982                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
983                   (shrink << 8)  |
984                   (grow << 0));   
985     OUT_BCS_BATCH(batch,
986                   (correct[5] << 20) |
987                   (correct[4] << 16) |
988                   (correct[3] << 12) |
989                   (correct[2] << 8) |
990                   (correct[1] << 4) |
991                   (correct[0] << 0));
992     OUT_BCS_BATCH(batch, 0);
993
994     ADVANCE_BCS_BATCH(batch);
995 }
996
997
998 #if MFC_SOFTWARE_HASWELL
999
1000 static int
1001 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1002                                int qp,unsigned int *msg,
1003                                struct intel_encoder_context *encoder_context,
1004                                unsigned char target_mb_size, unsigned char max_mb_size,
1005                                struct intel_batchbuffer *batch)
1006 {
1007     int len_in_dwords = 12;
1008     unsigned int intra_msg;
1009 #define         INTRA_MSG_FLAG          (1 << 13)
1010 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1011     if (batch == NULL)
1012         batch = encoder_context->base.batch;
1013
1014     BEGIN_BCS_BATCH(batch, len_in_dwords);
1015
1016     intra_msg = msg[0] & 0xC0FF;
1017     intra_msg |= INTRA_MSG_FLAG;
1018     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1019     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 0);
1022     OUT_BCS_BATCH(batch, 
1023                   (0 << 24) |           /* PackedMvNum, Debug*/
1024                   (0 << 20) |           /* No motion vector */
1025                   (1 << 19) |           /* CbpDcY */
1026                   (1 << 18) |           /* CbpDcU */
1027                   (1 << 17) |           /* CbpDcV */
1028                   intra_msg);
1029
1030     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1031     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1032     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1033
1034     /*Stuff for Intra MB*/
1035     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1036     OUT_BCS_BATCH(batch, msg[2]);       
1037     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1038     
1039     /*MaxSizeInWord and TargetSzieInWord*/
1040     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1041                   (target_mb_size << 16) );
1042
1043     OUT_BCS_BATCH(batch, 0);
1044
1045     ADVANCE_BCS_BATCH(batch);
1046
1047     return len_in_dwords;
1048 }
1049
1050 static int
1051 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1052                                unsigned int *msg, unsigned int offset,
1053                                struct intel_encoder_context *encoder_context,
1054                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1055                                struct intel_batchbuffer *batch)
1056 {
1057     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1058     int len_in_dwords = 12;
1059     unsigned int inter_msg = 0;
1060     if (batch == NULL)
1061         batch = encoder_context->base.batch;
1062     {
1063 #define MSG_MV_OFFSET   4
1064         unsigned int *mv_ptr;
1065         mv_ptr = msg + MSG_MV_OFFSET;
1066         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1067          * to convert them to be compatible with the format of AVC_PAK
1068          * command.
1069          */
1070         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1071             /* MV[0] and MV[2] are replicated */
1072             mv_ptr[4] = mv_ptr[0];
1073             mv_ptr[5] = mv_ptr[1];
1074             mv_ptr[2] = mv_ptr[8];
1075             mv_ptr[3] = mv_ptr[9];
1076             mv_ptr[6] = mv_ptr[8];
1077             mv_ptr[7] = mv_ptr[9];
1078         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1079             /* MV[0] and MV[1] are replicated */
1080             mv_ptr[2] = mv_ptr[0];
1081             mv_ptr[3] = mv_ptr[1];
1082             mv_ptr[4] = mv_ptr[16];
1083             mv_ptr[5] = mv_ptr[17];
1084             mv_ptr[6] = mv_ptr[24];
1085             mv_ptr[7] = mv_ptr[25];
1086         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1087                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1088             /* Don't touch MV[0] or MV[1] */
1089             mv_ptr[2] = mv_ptr[8];
1090             mv_ptr[3] = mv_ptr[9];
1091             mv_ptr[4] = mv_ptr[16];
1092             mv_ptr[5] = mv_ptr[17];
1093             mv_ptr[6] = mv_ptr[24];
1094             mv_ptr[7] = mv_ptr[25];
1095         }
1096     }
1097
1098     BEGIN_BCS_BATCH(batch, len_in_dwords);
1099
1100     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1101
1102     inter_msg = 32;
1103     /* MV quantity */
1104     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1105         if (msg[1] & SUBMB_SHAPE_MASK)
1106             inter_msg = 128;
1107     }
1108     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1109     OUT_BCS_BATCH(batch, offset);
1110     inter_msg = msg[0] & (0x1F00FFFF);
1111     inter_msg |= INTER_MV8;
1112     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1113     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1114         (msg[1] & SUBMB_SHAPE_MASK)) {
1115         inter_msg |= INTER_MV32;
1116     }
1117
1118     OUT_BCS_BATCH(batch, inter_msg);
1119
1120     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1121     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1122 #if 0 
1123     if ( slice_type == SLICE_TYPE_B) {
1124         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1125     } else {
1126         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1127     }
1128 #else
1129     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1130 #endif
1131
1132     inter_msg = msg[1] >> 8;
1133     /*Stuff for Inter MB*/
1134     OUT_BCS_BATCH(batch, inter_msg);        
1135     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1136     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1137
1138     /*MaxSizeInWord and TargetSzieInWord*/
1139     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1140                   (target_mb_size << 16) );
1141
1142     OUT_BCS_BATCH(batch, 0x0);    
1143
1144     ADVANCE_BCS_BATCH(batch);
1145
1146     return len_in_dwords;
1147 }
1148
1149 static void 
1150 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1151                                         struct encode_state *encode_state,
1152                                         struct intel_encoder_context *encoder_context,
1153                                         int slice_index,
1154                                         struct intel_batchbuffer *slice_batch)
1155 {
1156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1158     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1159     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1160     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1161     unsigned int *msg = NULL, offset = 0;
1162     unsigned char *msg_ptr = NULL;
1163     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1164     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1165     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1166     int i,x,y;
1167     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1168     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1169     unsigned int tail_data[] = { 0x0, 0x0 };
1170     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1171     int is_intra = slice_type == SLICE_TYPE_I;
1172
1173     if (rate_control_mode == VA_RC_CBR) {
1174         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1175         if (encode_state->slice_header_index[slice_index] == 0)
1176             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1177     }
1178
1179     /* only support for 8-bit pixel bit-depth */
1180     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1181     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1182     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1183     assert(qp >= 0 && qp < 52);
1184
1185     gen75_mfc_avc_slice_state(ctx,
1186                               pPicParameter,
1187                               pSliceParameter,
1188                               encode_state, encoder_context,
1189                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1190
1191     if ( slice_index == 0)
1192         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1193
1194     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1195
1196     dri_bo_map(vme_context->vme_output.bo , 1);
1197     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1198
1199     if (is_intra) {
1200         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1201     } else {
1202         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1203     }
1204    
1205     for (i = pSliceParameter->macroblock_address; 
1206          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1207         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1208         x = i % width_in_mbs;
1209         y = i / width_in_mbs;
1210         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1211
1212         if (is_intra) {
1213             assert(msg);
1214             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1215         } else {
1216             int inter_rdo, intra_rdo;
1217             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1218             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1219             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1220             if (intra_rdo < inter_rdo) { 
1221                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1222             } else {
1223                 msg += AVC_INTER_MSG_OFFSET;
1224                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1225             }
1226         }
1227     }
1228    
1229     dri_bo_unmap(vme_context->vme_output.bo);
1230
1231     if ( last_slice ) {    
1232         mfc_context->insert_object(ctx, encoder_context,
1233                                    tail_data, 2, 8,
1234                                    2, 1, 1, 0, slice_batch);
1235     } else {
1236         mfc_context->insert_object(ctx, encoder_context,
1237                                    tail_data, 1, 8,
1238                                    1, 1, 1, 0, slice_batch);
1239     }
1240 }
1241
1242 static dri_bo *
1243 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1244                                    struct encode_state *encode_state,
1245                                    struct intel_encoder_context *encoder_context)
1246 {
1247     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1248     struct i965_driver_data *i965 = i965_driver_data(ctx);
1249     struct intel_batchbuffer *batch;
1250     dri_bo *batch_bo;
1251     int i;
1252     int buffer_size;
1253
1254     batch = mfc_context->aux_batchbuffer;
1255     batch_bo = batch->buffer;
1256     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1257         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1258     }
1259
1260     intel_batchbuffer_align(batch, 8);
1261     
1262     BEGIN_BCS_BATCH(batch, 2);
1263     OUT_BCS_BATCH(batch, 0);
1264     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1265     ADVANCE_BCS_BATCH(batch);
1266
1267     dri_bo_reference(batch_bo);
1268
1269     intel_batchbuffer_free(batch);
1270     mfc_context->aux_batchbuffer = NULL;
1271
1272     return batch_bo;
1273 }
1274
1275 #else
1276
1277 static void
1278 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1279                                      struct encode_state *encode_state,
1280                                      struct intel_encoder_context *encoder_context)
1281
1282 {
1283     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1284     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1285
1286     assert(vme_context->vme_output.bo);
1287     mfc_context->buffer_suface_setup(ctx,
1288                                      &mfc_context->gpe_context,
1289                                      &vme_context->vme_output,
1290                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1291                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1292 }
1293
1294 static void
1295 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1296                                       struct encode_state *encode_state,
1297                                       struct intel_encoder_context *encoder_context)
1298
1299 {
1300     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1301     assert(mfc_context->aux_batchbuffer_surface.bo);
1302     mfc_context->buffer_suface_setup(ctx,
1303                                      &mfc_context->gpe_context,
1304                                      &mfc_context->aux_batchbuffer_surface,
1305                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1306                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1307 }
1308
1309 static void
1310 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1311                                      struct encode_state *encode_state,
1312                                      struct intel_encoder_context *encoder_context)
1313 {
1314     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1315     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1316 }
1317
1318 static void
1319 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1320                                  struct encode_state *encode_state,
1321                                  struct intel_encoder_context *encoder_context)
1322 {
1323     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1324     struct gen6_interface_descriptor_data *desc;   
1325     int i;
1326     dri_bo *bo;
1327
1328     bo = mfc_context->gpe_context.idrt.bo;
1329     dri_bo_map(bo, 1);
1330     assert(bo->virtual);
1331     desc = bo->virtual;
1332
1333     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1334         struct i965_kernel *kernel;
1335
1336         kernel = &mfc_context->gpe_context.kernels[i];
1337         assert(sizeof(*desc) == 32);
1338
1339         /*Setup the descritor table*/
1340         memset(desc, 0, sizeof(*desc));
1341         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1342         desc->desc2.sampler_count = 0;
1343         desc->desc2.sampler_state_pointer = 0;
1344         desc->desc3.binding_table_entry_count = 2;
1345         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1346         desc->desc4.constant_urb_entry_read_offset = 0;
1347         desc->desc4.constant_urb_entry_read_length = 4;
1348                 
1349         /*kernel start*/
1350         dri_bo_emit_reloc(bo,   
1351                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1352                           0,
1353                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1354                           kernel->bo);
1355         desc++;
1356     }
1357
1358     dri_bo_unmap(bo);
1359 }
1360
1361 static void
1362 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1363                                      struct encode_state *encode_state,
1364                                      struct intel_encoder_context *encoder_context)
1365 {
1366     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1367     
1368     (void)mfc_context;
1369 }
1370
1371 #define AVC_PAK_LEN_IN_BYTE     48
1372 #define AVC_PAK_LEN_IN_OWORD    3
1373
1374 static void
1375 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1376                                           uint32_t intra_flag,
1377                                           int head_offset,
1378                                           int number_mb_cmds,
1379                                           int slice_end_x,
1380                                           int slice_end_y,
1381                                           int mb_x,
1382                                           int mb_y,
1383                                           int width_in_mbs,
1384                                           int qp,
1385                                           uint32_t fwd_ref,
1386                                           uint32_t bwd_ref)
1387 {
1388     uint32_t temp_value;
1389     BEGIN_BATCH(batch, 14);
1390     
1391     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1392     OUT_BATCH(batch, 0);
1393     OUT_BATCH(batch, 0);
1394     OUT_BATCH(batch, 0);
1395     OUT_BATCH(batch, 0);
1396     OUT_BATCH(batch, 0);
1397    
1398     /*inline data */
1399     OUT_BATCH(batch, head_offset / 16);
1400     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1401     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1402     OUT_BATCH(batch, temp_value);
1403
1404     OUT_BATCH(batch, number_mb_cmds);
1405
1406     OUT_BATCH(batch,
1407               ((slice_end_y << 8) | (slice_end_x)));
1408     OUT_BATCH(batch, fwd_ref);
1409     OUT_BATCH(batch, bwd_ref);
1410
1411     OUT_BATCH(batch, MI_NOOP);
1412
1413     ADVANCE_BATCH(batch);
1414 }
1415
1416 static void
1417 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1418                                         struct intel_encoder_context *encoder_context,
1419                                         VAEncSliceParameterBufferH264 *slice_param,
1420                                         int head_offset,
1421                                         int qp,
1422                                         int last_slice)
1423 {
1424     struct intel_batchbuffer *batch = encoder_context->base.batch;
1425     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1426     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1427     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1428     int total_mbs = slice_param->num_macroblocks;
1429     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1430     int number_mb_cmds = 128;
1431     int starting_offset = 0;
1432     int mb_x, mb_y;
1433     int last_mb, slice_end_x, slice_end_y;
1434     int remaining_mb = total_mbs;
1435     uint32_t fwd_ref , bwd_ref, mb_flag;
1436
1437     last_mb = slice_param->macroblock_address + total_mbs - 1;
1438     slice_end_x = last_mb % width_in_mbs;
1439     slice_end_y = last_mb / width_in_mbs;
1440
1441     if (slice_type == SLICE_TYPE_I) {
1442         fwd_ref = 0;
1443         bwd_ref = 0;
1444         mb_flag = 1;
1445     } else {
1446         fwd_ref = vme_context->ref_index_in_mb[0];
1447         bwd_ref = vme_context->ref_index_in_mb[1];
1448         mb_flag = 0;
1449     }
1450
1451     if (width_in_mbs >= 100) {
1452         number_mb_cmds = width_in_mbs / 5;
1453     } else if (width_in_mbs >= 80) {
1454         number_mb_cmds = width_in_mbs / 4;
1455     } else if (width_in_mbs >= 60) {
1456         number_mb_cmds = width_in_mbs / 3;
1457     } else if (width_in_mbs >= 40) {
1458         number_mb_cmds = width_in_mbs / 2;
1459     } else {
1460         number_mb_cmds = width_in_mbs;
1461     }
1462
1463     do {
1464         if (number_mb_cmds >= remaining_mb) {
1465                 number_mb_cmds = remaining_mb;
1466         }
1467         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1468         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1469
1470         gen75_mfc_batchbuffer_emit_object_command(batch,
1471                                                   mb_flag,
1472                                                   head_offset,
1473                                                   number_mb_cmds,
1474                                                   slice_end_x,
1475                                                   slice_end_y,
1476                                                   mb_x,
1477                                                   mb_y,
1478                                                   width_in_mbs,
1479                                                   qp,
1480                                                   fwd_ref,
1481                                                   bwd_ref);
1482
1483         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1484         remaining_mb -= number_mb_cmds;
1485         starting_offset += number_mb_cmds;
1486     } while (remaining_mb > 0);
1487 }
1488                           
1489 /*
1490  * return size in Owords (16bytes)
1491  */         
1492 static void
1493 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1494                                 struct encode_state *encode_state,
1495                                 struct intel_encoder_context *encoder_context,
1496                                 int slice_index)
1497 {
1498     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1499     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1500     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1501     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1502     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1503     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1504     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1505     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1506     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1507     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1508     unsigned int tail_data[] = { 0x0, 0x0 };
1509     long head_offset;
1510     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1511
1512     if (rate_control_mode == VA_RC_CBR) {
1513         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1514         if (encode_state->slice_header_index[slice_index] == 0)
1515             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1516     }
1517
1518     /* only support for 8-bit pixel bit-depth */
1519     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1520     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1521     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1522     assert(qp >= 0 && qp < 52);
1523
1524     gen75_mfc_avc_slice_state(ctx,
1525                               pPicParameter,
1526                               pSliceParameter,
1527                               encode_state,
1528                               encoder_context,
1529                               (rate_control_mode == VA_RC_CBR),
1530                               qp,
1531                               slice_batch);
1532
1533     if (slice_index == 0)
1534         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1535
1536     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1537
1538     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1539     head_offset = intel_batchbuffer_used_size(slice_batch);
1540
1541     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1542
1543     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1544                                             encoder_context,
1545                                             pSliceParameter,
1546                                             head_offset,
1547                                             qp,
1548                                             last_slice);
1549
1550
1551     /* Aligned for tail */
1552     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1553     if (last_slice) {    
1554         mfc_context->insert_object(ctx,
1555                                    encoder_context,
1556                                    tail_data,
1557                                    2,
1558                                    8,
1559                                    2,
1560                                    1,
1561                                    1,
1562                                    0,
1563                                    slice_batch);
1564     } else {
1565         mfc_context->insert_object(ctx,
1566                                    encoder_context,
1567                                    tail_data,
1568                                    1,
1569                                    8,
1570                                    1,
1571                                    1,
1572                                    1,
1573                                    0,
1574                                    slice_batch);
1575     }
1576
1577     return;
1578 }
1579
1580 static void
1581 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1582                                    struct encode_state *encode_state,
1583                                    struct intel_encoder_context *encoder_context)
1584 {
1585     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1586     struct intel_batchbuffer *batch = encoder_context->base.batch;
1587     int i;
1588     intel_batchbuffer_start_atomic(batch, 0x4000); 
1589     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1590
1591     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1592         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1593     }
1594     {
1595         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1596         intel_batchbuffer_align(slice_batch, 8);
1597         BEGIN_BCS_BATCH(slice_batch, 2);
1598         OUT_BCS_BATCH(slice_batch, 0);
1599         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1600         ADVANCE_BCS_BATCH(slice_batch);
1601         mfc_context->aux_batchbuffer = NULL;
1602         intel_batchbuffer_free(slice_batch);
1603     }
1604     intel_batchbuffer_end_atomic(batch);
1605     intel_batchbuffer_flush(batch);
1606 }
1607
1608 static void
1609 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1610                                 struct encode_state *encode_state,
1611                                 struct intel_encoder_context *encoder_context)
1612 {
1613     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1614     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1615     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1616     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1617 }
1618
1619 static dri_bo *
1620 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1621                                    struct encode_state *encode_state,
1622                                    struct intel_encoder_context *encoder_context)
1623 {
1624     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1625
1626     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1627     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1628
1629     return mfc_context->aux_batchbuffer_surface.bo;
1630 }
1631
1632 #endif
1633
1634 static void
1635 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1636                                   struct encode_state *encode_state,
1637                                   struct intel_encoder_context *encoder_context)
1638 {
1639     struct intel_batchbuffer *batch = encoder_context->base.batch;
1640     dri_bo *slice_batch_bo;
1641
1642     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1643         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1644         assert(0);
1645         return; 
1646     }
1647
1648 #if MFC_SOFTWARE_HASWELL
1649     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1650 #else
1651     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1652 #endif
1653
1654     // begin programing
1655     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1656     intel_batchbuffer_emit_mi_flush(batch);
1657     
1658     // picture level programing
1659     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1660
1661     BEGIN_BCS_BATCH(batch, 2);
1662     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1663     OUT_BCS_RELOC(batch,
1664                   slice_batch_bo,
1665                   I915_GEM_DOMAIN_COMMAND, 0, 
1666                   0);
1667     ADVANCE_BCS_BATCH(batch);
1668
1669     // end programing
1670     intel_batchbuffer_end_atomic(batch);
1671
1672     dri_bo_unreference(slice_batch_bo);
1673 }
1674
1675
1676 static VAStatus
1677 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1678                              struct encode_state *encode_state,
1679                              struct intel_encoder_context *encoder_context)
1680 {
1681     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1682     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1683     int current_frame_bits_size;
1684     int sts;
1685  
1686     for (;;) {
1687         gen75_mfc_init(ctx, encode_state, encoder_context);
1688         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1689         /*Programing bcs pipeline*/
1690         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1691         gen75_mfc_run(ctx, encode_state, encoder_context);
1692         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1693             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1694             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1695             if (sts == BRC_NO_HRD_VIOLATION) {
1696                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1697                 break;
1698             }
1699             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1700                 if (!mfc_context->hrd.violation_noted) {
1701                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1702                     mfc_context->hrd.violation_noted = 1;
1703                 }
1704                 return VA_STATUS_SUCCESS;
1705             }
1706         } else {
1707             break;
1708         }
1709     }
1710
1711     return VA_STATUS_SUCCESS;
1712 }
1713
1714 /*
1715  * MPEG-2
1716  */
1717
1718 static const int
1719 va_to_gen75_mpeg2_picture_type[3] = {
1720     1,  /* I */
1721     2,  /* P */
1722     3   /* B */
1723 };
1724
1725 static void
1726 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1727                           struct intel_encoder_context *encoder_context,
1728                           struct encode_state *encode_state)
1729 {
1730     struct intel_batchbuffer *batch = encoder_context->base.batch;
1731     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1732     VAEncPictureParameterBufferMPEG2 *pic_param;
1733     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1734     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1735     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1736
1737     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1738     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1739     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1740
1741     BEGIN_BCS_BATCH(batch, 13);
1742     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1743     OUT_BCS_BATCH(batch,
1744                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1745                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1746                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1747                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1748                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1749                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1750                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1751                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1752                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1753                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1754                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1755                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1756     OUT_BCS_BATCH(batch,
1757                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1758                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1759                   0);
1760     OUT_BCS_BATCH(batch,
1761                   1 << 31 |     /* slice concealment */
1762                   (height_in_mbs - 1) << 16 |
1763                   (width_in_mbs - 1));
1764     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1765         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1766     else
1767         OUT_BCS_BATCH(batch, 0);
1768
1769     OUT_BCS_BATCH(batch, 0);
1770     OUT_BCS_BATCH(batch,
1771                   0xFFF << 16 | /* InterMBMaxSize */
1772                   0xFFF << 0 |  /* IntraMBMaxSize */
1773                   0);
1774     OUT_BCS_BATCH(batch, 0);
1775     OUT_BCS_BATCH(batch, 0);
1776     OUT_BCS_BATCH(batch, 0);
1777     OUT_BCS_BATCH(batch, 0);
1778     OUT_BCS_BATCH(batch, 0);
1779     OUT_BCS_BATCH(batch, 0);
1780     ADVANCE_BCS_BATCH(batch);
1781 }
1782
1783 static void
1784 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1785 {
1786     unsigned char intra_qm[64] = {
1787         8, 16, 19, 22, 26, 27, 29, 34,
1788         16, 16, 22, 24, 27, 29, 34, 37,
1789         19, 22, 26, 27, 29, 34, 34, 38,
1790         22, 22, 26, 27, 29, 34, 37, 40,
1791         22, 26, 27, 29, 32, 35, 40, 48,
1792         26, 27, 29, 32, 35, 40, 48, 58,
1793         26, 27, 29, 34, 38, 46, 56, 69,
1794         27, 29, 35, 38, 46, 56, 69, 83
1795     };
1796
1797     unsigned char non_intra_qm[64] = {
1798         16, 16, 16, 16, 16, 16, 16, 16,
1799         16, 16, 16, 16, 16, 16, 16, 16,
1800         16, 16, 16, 16, 16, 16, 16, 16,
1801         16, 16, 16, 16, 16, 16, 16, 16,
1802         16, 16, 16, 16, 16, 16, 16, 16,
1803         16, 16, 16, 16, 16, 16, 16, 16,
1804         16, 16, 16, 16, 16, 16, 16, 16,
1805         16, 16, 16, 16, 16, 16, 16, 16
1806     };
1807
1808     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1809     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1810 }
1811
1812 static void
1813 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1814 {
1815     unsigned short intra_fqm[64] = {
1816         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1817         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1818         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1819         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1820         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1821         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1822         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1823         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1824     };
1825
1826     unsigned short non_intra_fqm[64] = {
1827         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1828         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1829         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1830         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1831         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1832         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1833         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1834         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1835     };
1836
1837     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1838     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1839 }
1840
1841 static void
1842 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1843                                  struct intel_encoder_context *encoder_context,
1844                                  int x, int y,
1845                                  int next_x, int next_y,
1846                                  int is_fisrt_slice_group,
1847                                  int is_last_slice_group,
1848                                  int intra_slice,
1849                                  int qp,
1850                                  struct intel_batchbuffer *batch)
1851 {
1852     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1853
1854     if (batch == NULL)
1855         batch = encoder_context->base.batch;
1856
1857     BEGIN_BCS_BATCH(batch, 8);
1858
1859     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1860     OUT_BCS_BATCH(batch,
1861                   0 << 31 |                             /* MbRateCtrlFlag */
1862                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1863                   1 << 17 |                             /* Insert Header before the first slice group data */
1864                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1865                   1 << 15 |                             /* TailPresentFlag: always 1 */
1866                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1867                   !!intra_slice << 13 |                 /* IntraSlice */
1868                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1869                   0);
1870     OUT_BCS_BATCH(batch,
1871                   next_y << 24 |
1872                   next_x << 16 |
1873                   y << 8 |
1874                   x << 0 |
1875                   0);
1876     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1877     /* bitstream pointer is only loaded once for the first slice of a frame when 
1878      * LoadSlicePointerFlag is 0
1879      */
1880     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1881     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1882     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1883     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1884
1885     ADVANCE_BCS_BATCH(batch);
1886 }
1887
1888 static int
1889 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1890                                  struct intel_encoder_context *encoder_context,
1891                                  int x, int y,
1892                                  int first_mb_in_slice,
1893                                  int last_mb_in_slice,
1894                                  int first_mb_in_slice_group,
1895                                  int last_mb_in_slice_group,
1896                                  int mb_type,
1897                                  int qp_scale_code,
1898                                  int coded_block_pattern,
1899                                  unsigned char target_size_in_word,
1900                                  unsigned char max_size_in_word,
1901                                  struct intel_batchbuffer *batch)
1902 {
1903     int len_in_dwords = 9;
1904
1905     if (batch == NULL)
1906         batch = encoder_context->base.batch;
1907
1908     BEGIN_BCS_BATCH(batch, len_in_dwords);
1909
1910     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1911     OUT_BCS_BATCH(batch,
1912                   0 << 24 |     /* PackedMvNum */
1913                   0 << 20 |     /* MvFormat */
1914                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1915                   0 << 15 |     /* TransformFlag: frame DCT */
1916                   0 << 14 |     /* FieldMbFlag */
1917                   1 << 13 |     /* IntraMbFlag */
1918                   mb_type << 8 |   /* MbType: Intra */
1919                   0 << 2 |      /* SkipMbFlag */
1920                   0 << 0 |      /* InterMbMode */
1921                   0);
1922     OUT_BCS_BATCH(batch, y << 16 | x);
1923     OUT_BCS_BATCH(batch,
1924                   max_size_in_word << 24 |
1925                   target_size_in_word << 16 |
1926                   coded_block_pattern << 6 |      /* CBP */
1927                   0);
1928     OUT_BCS_BATCH(batch,
1929                   last_mb_in_slice << 31 |
1930                   first_mb_in_slice << 30 |
1931                   0 << 27 |     /* EnableCoeffClamp */
1932                   last_mb_in_slice_group << 26 |
1933                   0 << 25 |     /* MbSkipConvDisable */
1934                   first_mb_in_slice_group << 24 |
1935                   0 << 16 |     /* MvFieldSelect */
1936                   qp_scale_code << 0 |
1937                   0);
1938     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1939     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1940     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1941     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1942
1943     ADVANCE_BCS_BATCH(batch);
1944
1945     return len_in_dwords;
1946 }
1947
1948 #define MPEG2_INTER_MV_OFFSET   12 
1949
1950 static struct _mv_ranges
1951 {
1952     int low;    /* in the unit of 1/2 pixel */
1953     int high;   /* in the unit of 1/2 pixel */
1954 } mv_ranges[] = {
1955     {0, 0},
1956     {-16, 15},
1957     {-32, 31},
1958     {-64, 63},
1959     {-128, 127},
1960     {-256, 255},
1961     {-512, 511},
1962     {-1024, 1023},
1963     {-2048, 2047},
1964     {-4096, 4095}
1965 };
1966
1967 static int
1968 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1969 {
1970     if (mv + pos * 16 * 2 < 0 ||
1971         mv + (pos + 1) * 16 * 2 > display_max * 2)
1972         mv = 0;
1973
1974     if (f_code > 0 && f_code < 10) {
1975         if (mv < mv_ranges[f_code].low)
1976             mv = mv_ranges[f_code].low;
1977
1978         if (mv > mv_ranges[f_code].high)
1979             mv = mv_ranges[f_code].high;
1980     }
1981
1982     return mv;
1983 }
1984
1985 static int
1986 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1987                                  struct encode_state *encode_state,
1988                                  struct intel_encoder_context *encoder_context,
1989                                  unsigned int *msg,
1990                                  int width_in_mbs, int height_in_mbs,
1991                                  int x, int y,
1992                                  int first_mb_in_slice,
1993                                  int last_mb_in_slice,
1994                                  int first_mb_in_slice_group,
1995                                  int last_mb_in_slice_group,
1996                                  int qp_scale_code,
1997                                  unsigned char target_size_in_word,
1998                                  unsigned char max_size_in_word,
1999                                  struct intel_batchbuffer *batch)
2000 {
2001     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2002     int len_in_dwords = 9;
2003     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2004     
2005     if (batch == NULL)
2006         batch = encoder_context->base.batch;
2007
2008     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2009     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2010     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2011     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2012     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2013
2014     BEGIN_BCS_BATCH(batch, len_in_dwords);
2015
2016     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2017     OUT_BCS_BATCH(batch,
2018                   2 << 24 |     /* PackedMvNum */
2019                   7 << 20 |     /* MvFormat */
2020                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2021                   0 << 15 |     /* TransformFlag: frame DCT */
2022                   0 << 14 |     /* FieldMbFlag */
2023                   0 << 13 |     /* IntraMbFlag */
2024                   1 << 8 |      /* MbType: Frame-based */
2025                   0 << 2 |      /* SkipMbFlag */
2026                   0 << 0 |      /* InterMbMode */
2027                   0);
2028     OUT_BCS_BATCH(batch, y << 16 | x);
2029     OUT_BCS_BATCH(batch,
2030                   max_size_in_word << 24 |
2031                   target_size_in_word << 16 |
2032                   0x3f << 6 |   /* CBP */
2033                   0);
2034     OUT_BCS_BATCH(batch,
2035                   last_mb_in_slice << 31 |
2036                   first_mb_in_slice << 30 |
2037                   0 << 27 |     /* EnableCoeffClamp */
2038                   last_mb_in_slice_group << 26 |
2039                   0 << 25 |     /* MbSkipConvDisable */
2040                   first_mb_in_slice_group << 24 |
2041                   0 << 16 |     /* MvFieldSelect */
2042                   qp_scale_code << 0 |
2043                   0);
2044
2045     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2046     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2047     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2048     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2049
2050     ADVANCE_BCS_BATCH(batch);
2051
2052     return len_in_dwords;
2053 }
2054
2055 static void
2056 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2057                                            struct encode_state *encode_state,
2058                                            struct intel_encoder_context *encoder_context,
2059                                            struct intel_batchbuffer *slice_batch)
2060 {
2061     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2062     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2063
2064     if (encode_state->packed_header_data[idx]) {
2065         VAEncPackedHeaderParameterBuffer *param = NULL;
2066         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2067         unsigned int length_in_bits;
2068
2069         assert(encode_state->packed_header_param[idx]);
2070         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2071         length_in_bits = param->bit_length;
2072
2073         mfc_context->insert_object(ctx,
2074                                    encoder_context,
2075                                    header_data,
2076                                    ALIGN(length_in_bits, 32) >> 5,
2077                                    length_in_bits & 0x1f,
2078                                    5,   /* FIXME: check it */
2079                                    0,
2080                                    0,
2081                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2082                                    slice_batch);
2083     }
2084
2085     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2086
2087     if (encode_state->packed_header_data[idx]) {
2088         VAEncPackedHeaderParameterBuffer *param = NULL;
2089         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2090         unsigned int length_in_bits;
2091
2092         assert(encode_state->packed_header_param[idx]);
2093         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2094         length_in_bits = param->bit_length;
2095
2096         mfc_context->insert_object(ctx,
2097                                    encoder_context,
2098                                    header_data,
2099                                    ALIGN(length_in_bits, 32) >> 5,
2100                                    length_in_bits & 0x1f,
2101                                    5,   /* FIXME: check it */
2102                                    0,
2103                                    0,
2104                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2105                                    slice_batch);
2106     }
2107 }
2108
2109 static void 
2110 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2111                                      struct encode_state *encode_state,
2112                                      struct intel_encoder_context *encoder_context,
2113                                      int slice_index,
2114                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2115                                      struct intel_batchbuffer *slice_batch)
2116 {
2117     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2118     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2119     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2120     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2121     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2122     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2123     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2124     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2125     int i, j;
2126     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2127     unsigned int *msg = NULL;
2128     unsigned char *msg_ptr = NULL;
2129
2130     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2131     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2132     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2133     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2134
2135     dri_bo_map(vme_context->vme_output.bo , 0);
2136     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2137
2138     if (next_slice_group_param) {
2139         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2140         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2141     } else {
2142         h_next_start_pos = 0;
2143         v_next_start_pos = height_in_mbs;
2144     }
2145
2146     gen75_mfc_mpeg2_slicegroup_state(ctx,
2147                                      encoder_context,
2148                                      h_start_pos,
2149                                      v_start_pos,
2150                                      h_next_start_pos,
2151                                      v_next_start_pos,
2152                                      slice_index == 0,
2153                                      next_slice_group_param == NULL,
2154                                      slice_param->is_intra_slice,
2155                                      slice_param->quantiser_scale_code,
2156                                      slice_batch);
2157
2158     if (slice_index == 0) 
2159         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2160
2161     /* Insert '00' to make sure the header is valid */
2162     mfc_context->insert_object(ctx,
2163                                encoder_context,
2164                                (unsigned int*)section_delimiter,
2165                                1,
2166                                8,   /* 8bits in the last DWORD */
2167                                1,   /* 1 byte */
2168                                1,
2169                                0,
2170                                0,
2171                                slice_batch);
2172
2173     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2174         /* PAK for each macroblocks */
2175         for (j = 0; j < slice_param->num_macroblocks; j++) {
2176             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2177             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2178             int first_mb_in_slice = (j == 0);
2179             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2180             int first_mb_in_slice_group = (i == 0 && j == 0);
2181             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2182                                           j == slice_param->num_macroblocks - 1);
2183
2184             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2185
2186             if (slice_param->is_intra_slice) {
2187                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2188                                                  encoder_context,
2189                                                  h_pos, v_pos,
2190                                                  first_mb_in_slice,
2191                                                  last_mb_in_slice,
2192                                                  first_mb_in_slice_group,
2193                                                  last_mb_in_slice_group,
2194                                                  0x1a,
2195                                                  slice_param->quantiser_scale_code,
2196                                                  0x3f,
2197                                                  0,
2198                                                  0xff,
2199                                                  slice_batch);
2200             } else {
2201                 int inter_rdo, intra_rdo;
2202                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2203                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2204
2205                 if (intra_rdo < inter_rdo) 
2206                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2207                                                      encoder_context,
2208                                                      h_pos, v_pos,
2209                                                      first_mb_in_slice,
2210                                                      last_mb_in_slice,
2211                                                      first_mb_in_slice_group,
2212                                                      last_mb_in_slice_group,
2213                                                      0x1a,
2214                                                      slice_param->quantiser_scale_code,
2215                                                      0x3f,
2216                                                      0,
2217                                                      0xff,
2218                                                      slice_batch);
2219                 else
2220                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2221                                                      encode_state,
2222                                                      encoder_context,
2223                                                      msg,
2224                                                      width_in_mbs, height_in_mbs,
2225                                                      h_pos, v_pos,
2226                                                      first_mb_in_slice,
2227                                                      last_mb_in_slice,
2228                                                      first_mb_in_slice_group,
2229                                                      last_mb_in_slice_group,
2230                                                      slice_param->quantiser_scale_code,
2231                                                      0,
2232                                                      0xff,
2233                                                      slice_batch);
2234             }
2235         }
2236
2237         slice_param++;
2238     }
2239
2240     dri_bo_unmap(vme_context->vme_output.bo);
2241
2242     /* tail data */
2243     if (next_slice_group_param == NULL) { /* end of a picture */
2244         mfc_context->insert_object(ctx,
2245                                    encoder_context,
2246                                    (unsigned int *)tail_delimiter,
2247                                    2,
2248                                    8,   /* 8bits in the last DWORD */
2249                                    5,   /* 5 bytes */
2250                                    1,
2251                                    1,
2252                                    0,
2253                                    slice_batch);
2254     } else {        /* end of a lsice group */
2255         mfc_context->insert_object(ctx,
2256                                    encoder_context,
2257                                    (unsigned int *)section_delimiter,
2258                                    1,
2259                                    8,   /* 8bits in the last DWORD */
2260                                    1,   /* 1 byte */
2261                                    1,
2262                                    1,
2263                                    0,
2264                                    slice_batch);
2265     }
2266 }
2267
2268 /* 
2269  * A batch buffer for all slices, including slice state, 
2270  * slice insert object and slice pak object commands
2271  *
2272  */
2273 static dri_bo *
2274 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2275                                            struct encode_state *encode_state,
2276                                            struct intel_encoder_context *encoder_context)
2277 {
2278     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2279     struct intel_batchbuffer *batch;
2280     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2281     dri_bo *batch_bo;
2282     int i;
2283
2284     batch = mfc_context->aux_batchbuffer;
2285     batch_bo = batch->buffer;
2286
2287     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2288         if (i == encode_state->num_slice_params_ext - 1)
2289             next_slice_group_param = NULL;
2290         else
2291             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2292
2293         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2294     }
2295
2296     intel_batchbuffer_align(batch, 8);
2297     
2298     BEGIN_BCS_BATCH(batch, 2);
2299     OUT_BCS_BATCH(batch, 0);
2300     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2301     ADVANCE_BCS_BATCH(batch);
2302
2303     dri_bo_reference(batch_bo);
2304     intel_batchbuffer_free(batch);
2305     mfc_context->aux_batchbuffer = NULL;
2306
2307     return batch_bo;
2308 }
2309
2310 static void
2311 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2312                                             struct encode_state *encode_state,
2313                                             struct intel_encoder_context *encoder_context)
2314 {
2315     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2316
2317     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2318     mfc_context->set_surface_state(ctx, encoder_context);
2319     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2320     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2321     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2322     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2323     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2324     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2325 }
2326
2327 static void
2328 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2329                                     struct encode_state *encode_state,
2330                                     struct intel_encoder_context *encoder_context)
2331 {
2332     struct intel_batchbuffer *batch = encoder_context->base.batch;
2333     dri_bo *slice_batch_bo;
2334
2335     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2336
2337     // begin programing
2338     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2339     intel_batchbuffer_emit_mi_flush(batch);
2340     
2341     // picture level programing
2342     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2343
2344     BEGIN_BCS_BATCH(batch, 2);
2345     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2346     OUT_BCS_RELOC(batch,
2347                   slice_batch_bo,
2348                   I915_GEM_DOMAIN_COMMAND, 0, 
2349                   0);
2350     ADVANCE_BCS_BATCH(batch);
2351
2352     // end programing
2353     intel_batchbuffer_end_atomic(batch);
2354
2355     dri_bo_unreference(slice_batch_bo);
2356 }
2357
2358 static VAStatus
2359 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2360                         struct encode_state *encode_state,
2361                         struct intel_encoder_context *encoder_context)
2362 {
2363     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2364     struct object_surface *obj_surface; 
2365     struct object_buffer *obj_buffer;
2366     struct i965_coded_buffer_segment *coded_buffer_segment;
2367     VAStatus vaStatus = VA_STATUS_SUCCESS;
2368     dri_bo *bo;
2369     int i;
2370
2371     /* reconstructed surface */
2372     obj_surface = encode_state->reconstructed_object;
2373     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2374     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2375     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2376     mfc_context->surface_state.width = obj_surface->orig_width;
2377     mfc_context->surface_state.height = obj_surface->orig_height;
2378     mfc_context->surface_state.w_pitch = obj_surface->width;
2379     mfc_context->surface_state.h_pitch = obj_surface->height;
2380
2381     /* forward reference */
2382     obj_surface = encode_state->reference_objects[0];
2383
2384     if (obj_surface && obj_surface->bo) {
2385         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2386         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2387     } else
2388         mfc_context->reference_surfaces[0].bo = NULL;
2389
2390     /* backward reference */
2391     obj_surface = encode_state->reference_objects[1];
2392
2393     if (obj_surface && obj_surface->bo) {
2394         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2395         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2396     } else {
2397         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2398
2399         if (mfc_context->reference_surfaces[1].bo)
2400             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2401     }
2402
2403     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2404         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2405
2406         if (mfc_context->reference_surfaces[i].bo)
2407             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2408     }
2409     
2410     /* input YUV surface */
2411     obj_surface = encode_state->input_yuv_object;
2412     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2413     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2414
2415     /* coded buffer */
2416     obj_buffer = encode_state->coded_buf_object;
2417     bo = obj_buffer->buffer_store->bo;
2418     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2419     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2420     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2421     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2422
2423     /* set the internal flag to 0 to indicate the coded size is unknown */
2424     dri_bo_map(bo, 1);
2425     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2426     coded_buffer_segment->mapped = 0;
2427     coded_buffer_segment->codec = encoder_context->codec;
2428     dri_bo_unmap(bo);
2429
2430     return vaStatus;
2431 }
2432
2433 static VAStatus
2434 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2435                                struct encode_state *encode_state,
2436                                struct intel_encoder_context *encoder_context)
2437 {
2438     gen75_mfc_init(ctx, encode_state, encoder_context);
2439     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2440     /*Programing bcs pipeline*/
2441     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2442     gen75_mfc_run(ctx, encode_state, encoder_context);
2443
2444     return VA_STATUS_SUCCESS;
2445 }
2446
2447 static void
2448 gen75_mfc_context_destroy(void *context)
2449 {
2450     struct gen6_mfc_context *mfc_context = context;
2451     int i;
2452
2453     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2454     mfc_context->post_deblocking_output.bo = NULL;
2455
2456     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2457     mfc_context->pre_deblocking_output.bo = NULL;
2458
2459     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2460     mfc_context->uncompressed_picture_source.bo = NULL;
2461
2462     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2463     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2464
2465     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2466         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2467         mfc_context->direct_mv_buffers[i].bo = NULL;
2468     }
2469
2470     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2471     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2472
2473     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2474     mfc_context->macroblock_status_buffer.bo = NULL;
2475
2476     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2477     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2478
2479     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2480     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2481
2482     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2483         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2484         mfc_context->reference_surfaces[i].bo = NULL;  
2485     }
2486
2487     i965_gpe_context_destroy(&mfc_context->gpe_context);
2488
2489     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2490     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2491
2492     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2493     mfc_context->aux_batchbuffer_surface.bo = NULL;
2494
2495     if (mfc_context->aux_batchbuffer)
2496         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2497
2498     mfc_context->aux_batchbuffer = NULL;
2499
2500     free(mfc_context);
2501 }
2502
2503 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2504                                    VAProfile profile,
2505                                    struct encode_state *encode_state,
2506                                    struct intel_encoder_context *encoder_context)
2507 {
2508     VAStatus vaStatus;
2509
2510     switch (profile) {
2511     case VAProfileH264ConstrainedBaseline:
2512     case VAProfileH264Main:
2513     case VAProfileH264High:
2514     case VAProfileH264MultiviewHigh:
2515     case VAProfileH264StereoHigh:
2516         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2517         break;
2518
2519         /* FIXME: add for other profile */
2520     case VAProfileMPEG2Simple:
2521     case VAProfileMPEG2Main:
2522         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2523         break;
2524
2525     default:
2526         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2527         break;
2528     }
2529
2530     return vaStatus;
2531 }
2532
2533 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2534 {
2535     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2536
2537     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2538
2539     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2540     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2541
2542     mfc_context->gpe_context.curbe.length = 32 * 4;
2543
2544     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2545     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2546     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2547     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2548     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2549
2550     i965_gpe_load_kernels(ctx,
2551                           &mfc_context->gpe_context,
2552                           gen75_mfc_kernels,
2553                           1);
2554
2555     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2556     mfc_context->set_surface_state = gen75_mfc_surface_state;
2557     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2558     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2559     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2560     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2561     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2562     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2563
2564     encoder_context->mfc_context = mfc_context;
2565     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2566     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2567     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2568
2569     return True;
2570 }