VEBOX/bdw: set downsample method
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define MFC_SOFTWARE_HASWELL    0
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
57
58 #define B0_STEP_REV             2
59 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
60
61 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
62 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
63 };
64
65 static struct i965_kernel gen75_mfc_kernels[] = {
66     {
67         "MFC AVC INTRA BATCHBUFFER ",
68         MFC_BATCHBUFFER_AVC_INTRA,
69         gen75_mfc_batchbuffer_avc,
70         sizeof(gen75_mfc_batchbuffer_avc),
71         NULL
72     },
73 };
74
75 #define         INTER_MODE_MASK         0x03
76 #define         INTER_8X8               0x03
77 #define         INTER_16X8              0x01
78 #define         INTER_8X16              0x02
79 #define         SUBMB_SHAPE_MASK        0x00FF00
80
81 #define         INTER_MV8               (4 << 20)
82 #define         INTER_MV32              (6 << 20)
83
84
85 static void
86 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87                            int standard_select,
88                            struct intel_encoder_context *encoder_context)
89 {
90     struct intel_batchbuffer *batch = encoder_context->base.batch;
91     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92     assert(standard_select == MFX_FORMAT_MPEG2 ||
93            standard_select == MFX_FORMAT_AVC);
94
95     BEGIN_BCS_BATCH(batch, 5);
96
97     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
98     OUT_BCS_BATCH(batch,
99                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
100                   (MFD_MODE_VLD << 15) | /* VLD mode */
101                   (0 << 10) | /* Stream-Out Enable */
102                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
103                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
104                   (0 << 5)  | /* not in stitch mode */
105                   (1 << 4)  | /* encoding mode */
106                   (standard_select << 0));  /* standard select: avc or mpeg2 */
107     OUT_BCS_BATCH(batch,
108                   (0 << 7)  | /* expand NOA bus flag */
109                   (0 << 6)  | /* disable slice-level clock gating */
110                   (0 << 5)  | /* disable clock gating for NOA */
111                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
112                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
113                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
114                   (0 << 1)  |
115                   (0 << 0));
116     OUT_BCS_BATCH(batch, 0);
117     OUT_BCS_BATCH(batch, 0);
118
119     ADVANCE_BCS_BATCH(batch);
120 }
121
122 static void
123 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
124 {
125     struct intel_batchbuffer *batch = encoder_context->base.batch;
126     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
127
128     BEGIN_BCS_BATCH(batch, 6);
129
130     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
131     OUT_BCS_BATCH(batch, 0);
132     OUT_BCS_BATCH(batch,
133                   ((mfc_context->surface_state.height - 1) << 18) |
134                   ((mfc_context->surface_state.width - 1) << 4));
135     OUT_BCS_BATCH(batch,
136                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
137                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
138                   (0 << 22) | /* surface object control state, FIXME??? */
139                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
140                   (0 << 2)  | /* must be 0 for interleave U/V */
141                   (1 << 1)  | /* must be tiled */
142                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
143     OUT_BCS_BATCH(batch,
144                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
145                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
146     OUT_BCS_BATCH(batch, 0);
147
148     ADVANCE_BCS_BATCH(batch);
149 }
150
151 static void
152 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
153                                         struct intel_encoder_context *encoder_context)
154 {
155     struct intel_batchbuffer *batch = encoder_context->base.batch;
156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     BEGIN_BCS_BATCH(batch, 26);
160
161     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
162     /* the DW1-3 is for the MFX indirect bistream offset */
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165     OUT_BCS_BATCH(batch, 0);
166     /* the DW4-5 is the MFX upper bound */
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169
170     /* the DW6-10 is for MFX Indirect MV Object Base Address */
171     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
192     OUT_BCS_RELOC(batch,
193                   mfc_context->mfc_indirect_pak_bse_object.bo,
194                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195                   0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198         
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
203     OUT_BCS_BATCH(batch, 0);
204
205     ADVANCE_BCS_BATCH(batch);
206 }
207
208 static void
209 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
210 {
211     struct intel_batchbuffer *batch = encoder_context->base.batch;
212     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
213     struct gen6_vme_context *vme_context = encoder_context->vme_context;
214     struct i965_driver_data *i965 = i965_driver_data(ctx);
215
216     if (IS_STEPPING_BPLUS(i965)) {
217         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
218         return;
219     }
220
221     BEGIN_BCS_BATCH(batch, 11);
222
223     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     /* MFX Indirect MV Object Base Address */
227     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
228     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
234     OUT_BCS_RELOC(batch,
235                   mfc_context->mfc_indirect_pak_bse_object.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   0);
238     OUT_BCS_RELOC(batch,
239                   mfc_context->mfc_indirect_pak_bse_object.bo,
240                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
241                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
242
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
248                         struct intel_encoder_context *encoder_context)
249 {
250     struct intel_batchbuffer *batch = encoder_context->base.batch;
251     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
252     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
253
254     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
255     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
256
257     BEGIN_BCS_BATCH(batch, 16);
258
259     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
260     /*DW1. MB setting of frame */
261     OUT_BCS_BATCH(batch,
262                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
263     OUT_BCS_BATCH(batch, 
264                   ((height_in_mbs - 1) << 16) | 
265                   ((width_in_mbs - 1) << 0));
266     /* DW3 QP setting */
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     /* DW5 Trellis quantization */
293     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
294     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
295                   (0xBB8 << 16) |       /* InterMbMaxSz */
296                   (0xEE8) );            /* IntraMbMaxSz */
297     OUT_BCS_BATCH(batch, 0);            /* Reserved */
298     /* DW8. QP delta */
299     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
300     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
301     /* DW10. Bit setting for MB */
302     OUT_BCS_BATCH(batch, 0x8C000000);
303     OUT_BCS_BATCH(batch, 0x00010000);
304     /* DW12. */
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x02010100);
307     /* DW14. For short format */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310
311     ADVANCE_BCS_BATCH(batch);
312 }
313
314 static void
315 gen75_mfc_qm_state(VADriverContextP ctx,
316                    int qm_type,
317                    unsigned int *qm,
318                    int qm_length,
319                    struct intel_encoder_context *encoder_context)
320 {
321     struct intel_batchbuffer *batch = encoder_context->base.batch;
322     unsigned int qm_buffer[16];
323
324     assert(qm_length <= 16);
325     assert(sizeof(*qm) == 4);
326     memcpy(qm_buffer, qm, qm_length * 4);
327
328     BEGIN_BCS_BATCH(batch, 18);
329     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
330     OUT_BCS_BATCH(batch, qm_type << 0);
331     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335 static void
336 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
337 {
338     unsigned int qm[16] = {
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010,
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010
343     };
344
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
349 }
350
351 static void
352 gen75_mfc_fqm_state(VADriverContextP ctx,
353                     int fqm_type,
354                     unsigned int *fqm,
355                     int fqm_length,
356                     struct intel_encoder_context *encoder_context)
357 {
358     struct intel_batchbuffer *batch = encoder_context->base.batch;
359     unsigned int fqm_buffer[32];
360
361     assert(fqm_length <= 32);
362     assert(sizeof(*fqm) == 4);
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
367     OUT_BCS_BATCH(batch, fqm_type << 0);
368     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
374 {
375     unsigned int qm[32] = {
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000
384     };
385
386     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
387     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
390 }
391
392 static void
393 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
394                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
395                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
396                             struct intel_batchbuffer *batch)
397 {
398     if (batch == NULL)
399         batch = encoder_context->base.batch;
400
401     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
402
403     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
404     OUT_BCS_BATCH(batch,
405                   (0 << 16) |   /* always start at offset 0 */
406                   (data_bits_in_last_dw << 8) |
407                   (skip_emul_byte_count << 4) |
408                   (!!emulation_flag << 3) |
409                   ((!!is_last_header) << 2) |
410                   ((!!is_end_of_slice) << 1) |
411                   (0 << 0));    /* FIXME: ??? */
412     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
413
414     ADVANCE_BCS_BATCH(batch);
415 }
416
417
418 static void gen75_mfc_init(VADriverContextP ctx,
419                            struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     struct i965_driver_data *i965 = i965_driver_data(ctx);
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424     dri_bo *bo;
425     int i;
426     int width_in_mbs = 0;
427     int height_in_mbs = 0;
428     int slice_batchbuffer_size;
429
430     if (encoder_context->codec == CODEC_H264) {
431         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
432         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
433         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
434     } else {
435         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
436
437         assert(encoder_context->codec == CODEC_MPEG2);
438
439         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
440         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
441     }
442
443     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
444                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
445
446     /*Encode common setup for MFC*/
447     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
448     mfc_context->post_deblocking_output.bo = NULL;
449
450     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
451     mfc_context->pre_deblocking_output.bo = NULL;
452
453     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
454     mfc_context->uncompressed_picture_source.bo = NULL;
455
456     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
457     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
458
459     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
460         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
461         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
462         mfc_context->direct_mv_buffers[i].bo = NULL;
463     }
464
465     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
466         if (mfc_context->reference_surfaces[i].bo != NULL)
467             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
468         mfc_context->reference_surfaces[i].bo = NULL;  
469     }
470
471     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
472     bo = dri_bo_alloc(i965->intel.bufmgr,
473                       "Buffer",
474                       width_in_mbs * 64,
475                       64);
476     assert(bo);
477     mfc_context->intra_row_store_scratch_buffer.bo = bo;
478
479     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
480     bo = dri_bo_alloc(i965->intel.bufmgr,
481                       "Buffer",
482                       width_in_mbs * height_in_mbs * 16,
483                       64);
484     assert(bo);
485     mfc_context->macroblock_status_buffer.bo = bo;
486
487     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
488     bo = dri_bo_alloc(i965->intel.bufmgr,
489                       "Buffer",
490                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
491                       64);
492     assert(bo);
493     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
494
495     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
496     bo = dri_bo_alloc(i965->intel.bufmgr,
497                       "Buffer",
498                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
499                       0x1000);
500     assert(bo);
501     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
502
503     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
504     mfc_context->mfc_batchbuffer_surface.bo = NULL;
505
506     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
507     mfc_context->aux_batchbuffer_surface.bo = NULL;
508
509     if (mfc_context->aux_batchbuffer)
510         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
511
512     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
513                                                         slice_batchbuffer_size);
514     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
515     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
516     mfc_context->aux_batchbuffer_surface.pitch = 16;
517     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
518     mfc_context->aux_batchbuffer_surface.size_block = 16;
519
520     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
521 }
522
523 static void
524 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
525                                     struct intel_encoder_context *encoder_context)
526 {
527     struct intel_batchbuffer *batch = encoder_context->base.batch;
528     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
529     int i;
530
531     BEGIN_BCS_BATCH(batch, 61);
532
533     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
534
535     /* the DW1-3 is for pre_deblocking */
536     if (mfc_context->pre_deblocking_output.bo)
537         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
538                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
539                       0);
540     else
541         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
542
543     OUT_BCS_BATCH(batch, 0);
544     OUT_BCS_BATCH(batch, 0);
545     /* the DW4-6 is for the post_deblocking */
546
547     if (mfc_context->post_deblocking_output.bo)
548         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
549                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
550                       0);                                                                                       /* post output addr  */ 
551     else
552         OUT_BCS_BATCH(batch, 0);
553     OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the DW7-9 is for the uncompressed_picture */
557     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0); /* uncompressed data */
560
561     OUT_BCS_BATCH(batch, 0);
562     OUT_BCS_BATCH(batch, 0);
563
564     /* the DW10-12 is for the mb status */
565     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
566                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
567                   0); /* StreamOut data*/
568     OUT_BCS_BATCH(batch, 0);
569     OUT_BCS_BATCH(batch, 0);
570
571     /* the DW13-15 is for the intra_row_store_scratch */
572     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
573                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
574                   0);   
575     OUT_BCS_BATCH(batch, 0);
576     OUT_BCS_BATCH(batch, 0);
577
578     /* the DW16-18 is for the deblocking filter */
579     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
580                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
581                   0);
582     OUT_BCS_BATCH(batch, 0);
583     OUT_BCS_BATCH(batch, 0);
584
585     /* the DW 19-50 is for Reference pictures*/
586     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
587         if ( mfc_context->reference_surfaces[i].bo != NULL) {
588             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
589                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
590                           0);                   
591         } else {
592             OUT_BCS_BATCH(batch, 0);
593         }
594         OUT_BCS_BATCH(batch, 0);
595     }
596     OUT_BCS_BATCH(batch, 0);
597
598     /* The DW 52-54 is for the MB status buffer */
599     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
600                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
601                   0);                                                                                   /* Macroblock status buffer*/
602         
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605
606     /* the DW 55-57 is the ILDB buffer */
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     OUT_BCS_BATCH(batch, 0);
610
611     /* the DW 58-60 is the second ILDB buffer */
612     OUT_BCS_BATCH(batch, 0);
613     OUT_BCS_BATCH(batch, 0);
614     OUT_BCS_BATCH(batch, 0);
615     ADVANCE_BCS_BATCH(batch);
616 }
617
618 static void
619 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
620 {
621     struct intel_batchbuffer *batch = encoder_context->base.batch;
622     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
623     struct i965_driver_data *i965 = i965_driver_data(ctx);
624     int i;
625
626     if (IS_STEPPING_BPLUS(i965)) {
627         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
628         return;
629     }
630
631     BEGIN_BCS_BATCH(batch, 25);
632
633     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
634
635     if (mfc_context->pre_deblocking_output.bo)
636         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
637                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
638                       0);
639     else
640         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
641
642     if (mfc_context->post_deblocking_output.bo)
643         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
644                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
645                       0);                                                                                       /* post output addr  */ 
646     else
647         OUT_BCS_BATCH(batch, 0);
648
649     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* uncompressed data */
652     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);                                                                                   /* StreamOut data*/
655     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);   
658     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
659                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
660                   0);
661     /* 7..22 Reference pictures*/
662     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
663         if ( mfc_context->reference_surfaces[i].bo != NULL) {
664             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
665                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
666                           0);                   
667         } else {
668             OUT_BCS_BATCH(batch, 0);
669         }
670     }
671     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
672                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
673                   0);                                                                                   /* Macroblock status buffer*/
674
675     OUT_BCS_BATCH(batch, 0);
676
677     ADVANCE_BCS_BATCH(batch);
678 }
679
680 static void
681 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
682                                      struct intel_encoder_context *encoder_context)
683 {
684     struct intel_batchbuffer *batch = encoder_context->base.batch;
685     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
686
687     int i;
688
689     BEGIN_BCS_BATCH(batch, 71);
690
691     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
692
693     /* Reference frames and Current frames */
694     /* the DW1-32 is for the direct MV for reference */
695     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
696         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
697             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
698                           I915_GEM_DOMAIN_INSTRUCTION, 0,
699                           0);
700             OUT_BCS_BATCH(batch, 0);
701         } else {
702             OUT_BCS_BATCH(batch, 0);
703             OUT_BCS_BATCH(batch, 0);
704         }
705     }
706     OUT_BCS_BATCH(batch, 0);
707
708     /* the DW34-36 is the MV for the current reference */
709     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
710                   I915_GEM_DOMAIN_INSTRUCTION, 0,
711                   0);
712
713     OUT_BCS_BATCH(batch, 0);
714     OUT_BCS_BATCH(batch, 0);
715
716     /* POL list */
717     for(i = 0; i < 32; i++) {
718         OUT_BCS_BATCH(batch, i/2);
719     }
720     OUT_BCS_BATCH(batch, 0);
721     OUT_BCS_BATCH(batch, 0);
722
723     ADVANCE_BCS_BATCH(batch);
724 }
725
726 static void
727 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
728 {
729     struct intel_batchbuffer *batch = encoder_context->base.batch;
730     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
731     struct i965_driver_data *i965 = i965_driver_data(ctx);
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
736         return;
737     }
738
739     BEGIN_BCS_BATCH(batch, 69);
740
741     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
742
743     /* Reference frames and Current frames */
744     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
745         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
746             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
747                           I915_GEM_DOMAIN_INSTRUCTION, 0,
748                           0);
749         } else {
750             OUT_BCS_BATCH(batch, 0);
751         }
752     }
753
754     /* POL list */
755     for(i = 0; i < 32; i++) {
756         OUT_BCS_BATCH(batch, i/2);
757     }
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch, 0);
760
761     ADVANCE_BCS_BATCH(batch);
762 }
763
764
765 static void
766 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
767                                         struct intel_encoder_context *encoder_context)
768 {
769     struct intel_batchbuffer *batch = encoder_context->base.batch;
770     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
771
772     BEGIN_BCS_BATCH(batch, 10);
773
774     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
775     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
776                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
777                   0);
778     OUT_BCS_BATCH(batch, 0);
779     OUT_BCS_BATCH(batch, 0);
780         
781     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
782     OUT_BCS_BATCH(batch, 0);
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785
786     /* the DW7-9 is for Bitplane Read Buffer Base Address */
787     OUT_BCS_BATCH(batch, 0);
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790
791     ADVANCE_BCS_BATCH(batch);
792 }
793
794 static void
795 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
796 {
797     struct intel_batchbuffer *batch = encoder_context->base.batch;
798     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
799     struct i965_driver_data *i965 = i965_driver_data(ctx);
800
801     if (IS_STEPPING_BPLUS(i965)) {
802         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
803         return;
804     }
805
806     BEGIN_BCS_BATCH(batch, 4);
807
808     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
809     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
810                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
811                   0);
812     OUT_BCS_BATCH(batch, 0);
813     OUT_BCS_BATCH(batch, 0);
814
815     ADVANCE_BCS_BATCH(batch);
816 }
817
818
819 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
820                                                        struct encode_state *encode_state,
821                                                        struct intel_encoder_context *encoder_context)
822 {
823     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
824
825     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
826     mfc_context->set_surface_state(ctx, encoder_context);
827     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
828     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
829     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
830     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
831     mfc_context->avc_qm_state(ctx, encoder_context);
832     mfc_context->avc_fqm_state(ctx, encoder_context);
833     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
834     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
835 }
836
837
838 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
839                               struct encode_state *encode_state,
840                               struct intel_encoder_context *encoder_context)
841 {
842     struct intel_batchbuffer *batch = encoder_context->base.batch;
843
844     intel_batchbuffer_flush(batch);             //run the pipeline
845
846     return VA_STATUS_SUCCESS;
847 }
848
849
850 static VAStatus
851 gen75_mfc_stop(VADriverContextP ctx, 
852                struct encode_state *encode_state,
853                struct intel_encoder_context *encoder_context,
854                int *encoded_bits_size)
855 {
856     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
857     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
858     VACodedBufferSegment *coded_buffer_segment;
859     
860     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
861     assert(vaStatus == VA_STATUS_SUCCESS);
862     *encoded_bits_size = coded_buffer_segment->size * 8;
863     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
864
865     return VA_STATUS_SUCCESS;
866 }
867
868
869 static void
870 gen75_mfc_avc_slice_state(VADriverContextP ctx,
871                           VAEncPictureParameterBufferH264 *pic_param,
872                           VAEncSliceParameterBufferH264 *slice_param,
873                           struct encode_state *encode_state,
874                           struct intel_encoder_context *encoder_context,
875                           int rate_control_enable,
876                           int qp,
877                           struct intel_batchbuffer *batch)
878 {
879     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
880     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
881     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
882     int beginmb = slice_param->macroblock_address;
883     int endmb = beginmb + slice_param->num_macroblocks;
884     int beginx = beginmb % width_in_mbs;
885     int beginy = beginmb / width_in_mbs;
886     int nextx =  endmb % width_in_mbs;
887     int nexty = endmb / width_in_mbs;
888     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
889     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
890     int maxQpN, maxQpP;
891     unsigned char correct[6], grow, shrink;
892     int i;
893     int weighted_pred_idc = 0;
894     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
895     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
896     int num_ref_l0 = 0, num_ref_l1 = 0;
897
898     if (batch == NULL)
899         batch = encoder_context->base.batch;
900
901     if (slice_type == SLICE_TYPE_I) {
902         luma_log2_weight_denom = 0;
903         chroma_log2_weight_denom = 0;
904     } else if (slice_type == SLICE_TYPE_P) {
905         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
906         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
907
908         if (slice_param->num_ref_idx_active_override_flag)
909             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
910     } else if (slice_type == SLICE_TYPE_B) {
911         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
912         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
913         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
914
915         if (slice_param->num_ref_idx_active_override_flag) {
916             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
917             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
918         }
919
920         if (weighted_pred_idc == 2) {
921             /* 8.4.3 - Derivation process for prediction weights (8-279) */
922             luma_log2_weight_denom = 5;
923             chroma_log2_weight_denom = 5;
924         }
925     }
926
927     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
928     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
929
930     for (i = 0; i < 6; i++)
931         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
932
933     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
934         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
935     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
936         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
937
938     BEGIN_BCS_BATCH(batch, 11);;
939
940     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
941     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
942
943     OUT_BCS_BATCH(batch,
944                   (num_ref_l0 << 16) |
945                   (num_ref_l1 << 24) |
946                   (chroma_log2_weight_denom << 8) |
947                   (luma_log2_weight_denom << 0));
948
949     OUT_BCS_BATCH(batch, 
950                   (weighted_pred_idc << 30) |
951                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
952                   (slice_param->disable_deblocking_filter_idc << 27) |
953                   (slice_param->cabac_init_idc << 24) |
954                   (qp<<16) |                    /*Slice Quantization Parameter*/
955                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
956                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
957     OUT_BCS_BATCH(batch,
958                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
959                   (beginx << 16) |
960                   slice_param->macroblock_address );
961     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
962     OUT_BCS_BATCH(batch, 
963                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
964                   (1 << 30) |           /*ResetRateControlCounter*/
965                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
966                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
967                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
968                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
969                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
970                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
971                   (last_slice << 19) |     /*IsLastSlice*/
972                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
973                   (1 << 17) |       /*HeaderPresentFlag*/       
974                   (1 << 16) |       /*SliceData PresentFlag*/
975                   (1 << 15) |       /*TailPresentFlag*/
976                   (1 << 13) |       /*RBSP NAL TYPE*/   
977                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
978     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
979     OUT_BCS_BATCH(batch,
980                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
981                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
982                   (shrink << 8)  |
983                   (grow << 0));   
984     OUT_BCS_BATCH(batch,
985                   (correct[5] << 20) |
986                   (correct[4] << 16) |
987                   (correct[3] << 12) |
988                   (correct[2] << 8) |
989                   (correct[1] << 4) |
990                   (correct[0] << 0));
991     OUT_BCS_BATCH(batch, 0);
992
993     ADVANCE_BCS_BATCH(batch);
994 }
995
996
997 #if MFC_SOFTWARE_HASWELL
998
999 static int
1000 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1001                                int qp,unsigned int *msg,
1002                                struct intel_encoder_context *encoder_context,
1003                                unsigned char target_mb_size, unsigned char max_mb_size,
1004                                struct intel_batchbuffer *batch)
1005 {
1006     int len_in_dwords = 12;
1007     unsigned int intra_msg;
1008 #define         INTRA_MSG_FLAG          (1 << 13)
1009 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1010     if (batch == NULL)
1011         batch = encoder_context->base.batch;
1012
1013     BEGIN_BCS_BATCH(batch, len_in_dwords);
1014
1015     intra_msg = msg[0] & 0xC0FF;
1016     intra_msg |= INTRA_MSG_FLAG;
1017     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1018     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1019     OUT_BCS_BATCH(batch, 0);
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 
1022                   (0 << 24) |           /* PackedMvNum, Debug*/
1023                   (0 << 20) |           /* No motion vector */
1024                   (1 << 19) |           /* CbpDcY */
1025                   (1 << 18) |           /* CbpDcU */
1026                   (1 << 17) |           /* CbpDcV */
1027                   intra_msg);
1028
1029     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1030     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1031     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1032
1033     /*Stuff for Intra MB*/
1034     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1035     OUT_BCS_BATCH(batch, msg[2]);       
1036     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1037     
1038     /*MaxSizeInWord and TargetSzieInWord*/
1039     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1040                   (target_mb_size << 16) );
1041
1042     OUT_BCS_BATCH(batch, 0);
1043
1044     ADVANCE_BCS_BATCH(batch);
1045
1046     return len_in_dwords;
1047 }
1048
1049 static int
1050 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1051                                unsigned int *msg, unsigned int offset,
1052                                struct intel_encoder_context *encoder_context,
1053                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1054                                struct intel_batchbuffer *batch)
1055 {
1056     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1057     int len_in_dwords = 12;
1058     unsigned int inter_msg = 0;
1059     if (batch == NULL)
1060         batch = encoder_context->base.batch;
1061     {
1062 #define MSG_MV_OFFSET   4
1063         unsigned int *mv_ptr;
1064         mv_ptr = msg + MSG_MV_OFFSET;
1065         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1066          * to convert them to be compatible with the format of AVC_PAK
1067          * command.
1068          */
1069         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1070             /* MV[0] and MV[2] are replicated */
1071             mv_ptr[4] = mv_ptr[0];
1072             mv_ptr[5] = mv_ptr[1];
1073             mv_ptr[2] = mv_ptr[8];
1074             mv_ptr[3] = mv_ptr[9];
1075             mv_ptr[6] = mv_ptr[8];
1076             mv_ptr[7] = mv_ptr[9];
1077         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1078             /* MV[0] and MV[1] are replicated */
1079             mv_ptr[2] = mv_ptr[0];
1080             mv_ptr[3] = mv_ptr[1];
1081             mv_ptr[4] = mv_ptr[16];
1082             mv_ptr[5] = mv_ptr[17];
1083             mv_ptr[6] = mv_ptr[24];
1084             mv_ptr[7] = mv_ptr[25];
1085         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1086                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1087             /* Don't touch MV[0] or MV[1] */
1088             mv_ptr[2] = mv_ptr[8];
1089             mv_ptr[3] = mv_ptr[9];
1090             mv_ptr[4] = mv_ptr[16];
1091             mv_ptr[5] = mv_ptr[17];
1092             mv_ptr[6] = mv_ptr[24];
1093             mv_ptr[7] = mv_ptr[25];
1094         }
1095     }
1096
1097     BEGIN_BCS_BATCH(batch, len_in_dwords);
1098
1099     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1100
1101     inter_msg = 32;
1102     /* MV quantity */
1103     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1104         if (msg[1] & SUBMB_SHAPE_MASK)
1105             inter_msg = 128;
1106     }
1107     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1108     OUT_BCS_BATCH(batch, offset);
1109     inter_msg = msg[0] & (0x1F00FFFF);
1110     inter_msg |= INTER_MV8;
1111     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1112     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1113         (msg[1] & SUBMB_SHAPE_MASK)) {
1114         inter_msg |= INTER_MV32;
1115     }
1116
1117     OUT_BCS_BATCH(batch, inter_msg);
1118
1119     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1120     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1121 #if 0 
1122     if ( slice_type == SLICE_TYPE_B) {
1123         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1124     } else {
1125         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1126     }
1127 #else
1128     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1129 #endif
1130
1131     inter_msg = msg[1] >> 8;
1132     /*Stuff for Inter MB*/
1133     OUT_BCS_BATCH(batch, inter_msg);        
1134     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1135     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1136
1137     /*MaxSizeInWord and TargetSzieInWord*/
1138     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1139                   (target_mb_size << 16) );
1140
1141     OUT_BCS_BATCH(batch, 0x0);    
1142
1143     ADVANCE_BCS_BATCH(batch);
1144
1145     return len_in_dwords;
1146 }
1147
1148 static void 
1149 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1150                                         struct encode_state *encode_state,
1151                                         struct intel_encoder_context *encoder_context,
1152                                         int slice_index,
1153                                         struct intel_batchbuffer *slice_batch)
1154 {
1155     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1156     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1157     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1158     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1159     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1160     unsigned int *msg = NULL, offset = 0;
1161     unsigned char *msg_ptr = NULL;
1162     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1163     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1164     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1165     int i,x,y;
1166     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1167     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1168     unsigned char *slice_header = NULL;
1169     int slice_header_length_in_bits = 0;
1170     unsigned int tail_data[] = { 0x0, 0x0 };
1171     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1172     int is_intra = slice_type == SLICE_TYPE_I;
1173
1174     if (rate_control_mode == VA_RC_CBR) {
1175         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1176         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1177     }
1178
1179     /* only support for 8-bit pixel bit-depth */
1180     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1181     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1182     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1183     assert(qp >= 0 && qp < 52);
1184
1185     gen75_mfc_avc_slice_state(ctx, 
1186                               pPicParameter,
1187                               pSliceParameter,
1188                               encode_state, encoder_context,
1189                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1190
1191     if ( slice_index == 0) 
1192         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1193
1194     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1195
1196     // slice hander
1197     mfc_context->insert_object(ctx, encoder_context,
1198                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1199                                5,  /* first 5 bytes are start code + nal unit type */
1200                                1, 0, 1, slice_batch);
1201
1202     dri_bo_map(vme_context->vme_output.bo , 1);
1203     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1204
1205     if (is_intra) {
1206         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1207     } else {
1208         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1209     }
1210    
1211     for (i = pSliceParameter->macroblock_address; 
1212          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1213         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1214         x = i % width_in_mbs;
1215         y = i / width_in_mbs;
1216         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1217
1218         if (is_intra) {
1219             assert(msg);
1220             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1221         } else {
1222             int inter_rdo, intra_rdo;
1223             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1224             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1225             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1226             if (intra_rdo < inter_rdo) { 
1227                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1228             } else {
1229                 msg += AVC_INTER_MSG_OFFSET;
1230                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1231             }
1232         }
1233     }
1234    
1235     dri_bo_unmap(vme_context->vme_output.bo);
1236
1237     if ( last_slice ) {    
1238         mfc_context->insert_object(ctx, encoder_context,
1239                                    tail_data, 2, 8,
1240                                    2, 1, 1, 0, slice_batch);
1241     } else {
1242         mfc_context->insert_object(ctx, encoder_context,
1243                                    tail_data, 1, 8,
1244                                    1, 1, 1, 0, slice_batch);
1245     }
1246
1247     free(slice_header);
1248
1249 }
1250
1251 static dri_bo *
1252 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1253                                    struct encode_state *encode_state,
1254                                    struct intel_encoder_context *encoder_context)
1255 {
1256     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1257     struct i965_driver_data *i965 = i965_driver_data(ctx);
1258     struct intel_batchbuffer *batch;
1259     dri_bo *batch_bo;
1260     int i;
1261     int buffer_size;
1262
1263     batch = mfc_context->aux_batchbuffer;
1264     batch_bo = batch->buffer;
1265     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1266         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1267     }
1268
1269     intel_batchbuffer_align(batch, 8);
1270     
1271     BEGIN_BCS_BATCH(batch, 2);
1272     OUT_BCS_BATCH(batch, 0);
1273     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1274     ADVANCE_BCS_BATCH(batch);
1275
1276     dri_bo_reference(batch_bo);
1277
1278     intel_batchbuffer_free(batch);
1279     mfc_context->aux_batchbuffer = NULL;
1280
1281     return batch_bo;
1282 }
1283
1284 #else
1285
1286 static void
1287 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1288                                      struct encode_state *encode_state,
1289                                      struct intel_encoder_context *encoder_context)
1290
1291 {
1292     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1293     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1294
1295     assert(vme_context->vme_output.bo);
1296     mfc_context->buffer_suface_setup(ctx,
1297                                      &mfc_context->gpe_context,
1298                                      &vme_context->vme_output,
1299                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1300                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1301 }
1302
1303 static void
1304 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1305                                       struct encode_state *encode_state,
1306                                       struct intel_encoder_context *encoder_context)
1307
1308 {
1309     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1310     assert(mfc_context->aux_batchbuffer_surface.bo);
1311     mfc_context->buffer_suface_setup(ctx,
1312                                      &mfc_context->gpe_context,
1313                                      &mfc_context->aux_batchbuffer_surface,
1314                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1315                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1316 }
1317
1318 static void
1319 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1320                                      struct encode_state *encode_state,
1321                                      struct intel_encoder_context *encoder_context)
1322 {
1323     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1324     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1325 }
1326
1327 static void
1328 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1329                                  struct encode_state *encode_state,
1330                                  struct intel_encoder_context *encoder_context)
1331 {
1332     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1333     struct gen6_interface_descriptor_data *desc;   
1334     int i;
1335     dri_bo *bo;
1336
1337     bo = mfc_context->gpe_context.idrt.bo;
1338     dri_bo_map(bo, 1);
1339     assert(bo->virtual);
1340     desc = bo->virtual;
1341
1342     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1343         struct i965_kernel *kernel;
1344
1345         kernel = &mfc_context->gpe_context.kernels[i];
1346         assert(sizeof(*desc) == 32);
1347
1348         /*Setup the descritor table*/
1349         memset(desc, 0, sizeof(*desc));
1350         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1351         desc->desc2.sampler_count = 0;
1352         desc->desc2.sampler_state_pointer = 0;
1353         desc->desc3.binding_table_entry_count = 2;
1354         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1355         desc->desc4.constant_urb_entry_read_offset = 0;
1356         desc->desc4.constant_urb_entry_read_length = 4;
1357                 
1358         /*kernel start*/
1359         dri_bo_emit_reloc(bo,   
1360                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1361                           0,
1362                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1363                           kernel->bo);
1364         desc++;
1365     }
1366
1367     dri_bo_unmap(bo);
1368 }
1369
1370 static void
1371 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1372                                      struct encode_state *encode_state,
1373                                      struct intel_encoder_context *encoder_context)
1374 {
1375     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1376     
1377     (void)mfc_context;
1378 }
1379
1380 #define AVC_PAK_LEN_IN_BYTE     48
1381 #define AVC_PAK_LEN_IN_OWORD    3
1382
1383 static void
1384 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1385                                           uint32_t intra_flag,
1386                                           int head_offset,
1387                                           int number_mb_cmds,
1388                                           int slice_end_x,
1389                                           int slice_end_y,
1390                                           int mb_x,
1391                                           int mb_y,
1392                                           int width_in_mbs,
1393                                           int qp,
1394                                           uint32_t fwd_ref,
1395                                           uint32_t bwd_ref)
1396 {
1397     uint32_t temp_value;
1398     BEGIN_BATCH(batch, 14);
1399     
1400     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1401     OUT_BATCH(batch, 0);
1402     OUT_BATCH(batch, 0);
1403     OUT_BATCH(batch, 0);
1404     OUT_BATCH(batch, 0);
1405     OUT_BATCH(batch, 0);
1406    
1407     /*inline data */
1408     OUT_BATCH(batch, head_offset / 16);
1409     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1410     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1411     OUT_BATCH(batch, temp_value);
1412
1413     OUT_BATCH(batch, number_mb_cmds);
1414
1415     OUT_BATCH(batch,
1416               ((slice_end_y << 8) | (slice_end_x)));
1417     OUT_BATCH(batch, fwd_ref);
1418     OUT_BATCH(batch, bwd_ref);
1419
1420     OUT_BATCH(batch, MI_NOOP);
1421
1422     ADVANCE_BATCH(batch);
1423 }
1424
1425 static void
1426 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1427                                         struct intel_encoder_context *encoder_context,
1428                                         VAEncSliceParameterBufferH264 *slice_param,
1429                                         int head_offset,
1430                                         int qp,
1431                                         int last_slice)
1432 {
1433     struct intel_batchbuffer *batch = encoder_context->base.batch;
1434     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1435     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1436     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1437     int total_mbs = slice_param->num_macroblocks;
1438     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1439     int number_mb_cmds = 128;
1440     int starting_offset = 0;
1441     int mb_x, mb_y;
1442     int last_mb, slice_end_x, slice_end_y;
1443     int remaining_mb = total_mbs;
1444     uint32_t fwd_ref , bwd_ref, mb_flag;
1445
1446     last_mb = slice_param->macroblock_address + total_mbs - 1;
1447     slice_end_x = last_mb % width_in_mbs;
1448     slice_end_y = last_mb / width_in_mbs;
1449
1450     if (slice_type == SLICE_TYPE_I) {
1451         fwd_ref = 0;
1452         bwd_ref = 0;
1453         mb_flag = 1;
1454     } else {
1455         fwd_ref = vme_context->ref_index_in_mb[0];
1456         bwd_ref = vme_context->ref_index_in_mb[1];
1457         mb_flag = 0;
1458     }
1459
1460     if (width_in_mbs >= 100) {
1461         number_mb_cmds = width_in_mbs / 5;
1462     } else if (width_in_mbs >= 80) {
1463         number_mb_cmds = width_in_mbs / 4;
1464     } else if (width_in_mbs >= 60) {
1465         number_mb_cmds = width_in_mbs / 3;
1466     } else if (width_in_mbs >= 40) {
1467         number_mb_cmds = width_in_mbs / 2;
1468     } else {
1469         number_mb_cmds = width_in_mbs;
1470     }
1471
1472     do {
1473         if (number_mb_cmds >= remaining_mb) {
1474                 number_mb_cmds = remaining_mb;
1475         }
1476         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1477         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1478
1479         gen75_mfc_batchbuffer_emit_object_command(batch,
1480                                                   mb_flag,
1481                                                   head_offset,
1482                                                   number_mb_cmds,
1483                                                   slice_end_x,
1484                                                   slice_end_y,
1485                                                   mb_x,
1486                                                   mb_y,
1487                                                   width_in_mbs,
1488                                                   qp,
1489                                                   fwd_ref,
1490                                                   bwd_ref);
1491
1492         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1493         remaining_mb -= number_mb_cmds;
1494         starting_offset += number_mb_cmds;
1495     } while (remaining_mb > 0);
1496 }
1497                           
1498 /*
1499  * return size in Owords (16bytes)
1500  */         
1501 static void
1502 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1503                                 struct encode_state *encode_state,
1504                                 struct intel_encoder_context *encoder_context,
1505                                 int slice_index)
1506 {
1507     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1508     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1509     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1510     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1511     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1512     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1513     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1514     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1515     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1516     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1517     unsigned char *slice_header = NULL;
1518     int slice_header_length_in_bits = 0;
1519     unsigned int tail_data[] = { 0x0, 0x0 };
1520     long head_offset;
1521     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1522
1523     if (rate_control_mode == VA_RC_CBR) {
1524         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1525         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1526     }
1527
1528     /* only support for 8-bit pixel bit-depth */
1529     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1530     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1531     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1532     assert(qp >= 0 && qp < 52);
1533
1534     gen75_mfc_avc_slice_state(ctx,
1535                               pPicParameter,
1536                               pSliceParameter,
1537                               encode_state,
1538                               encoder_context,
1539                               (rate_control_mode == VA_RC_CBR),
1540                               qp,
1541                               slice_batch);
1542
1543     if (slice_index == 0)
1544         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1545
1546     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1547
1548     // slice hander
1549     mfc_context->insert_object(ctx,
1550                                encoder_context,
1551                                (unsigned int *)slice_header,
1552                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1553                                slice_header_length_in_bits & 0x1f,
1554                                5,  /* first 5 bytes are start code + nal unit type */
1555                                1,
1556                                0,
1557                                1,
1558                                slice_batch);
1559     free(slice_header);
1560
1561     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1562     head_offset = intel_batchbuffer_used_size(slice_batch);
1563
1564     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1565
1566     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1567                                             encoder_context,
1568                                             pSliceParameter,
1569                                             head_offset,
1570                                             qp,
1571                                             last_slice);
1572
1573
1574     /* Aligned for tail */
1575     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1576     if (last_slice) {    
1577         mfc_context->insert_object(ctx,
1578                                    encoder_context,
1579                                    tail_data,
1580                                    2,
1581                                    8,
1582                                    2,
1583                                    1,
1584                                    1,
1585                                    0,
1586                                    slice_batch);
1587     } else {
1588         mfc_context->insert_object(ctx,
1589                                    encoder_context,
1590                                    tail_data,
1591                                    1,
1592                                    8,
1593                                    1,
1594                                    1,
1595                                    1,
1596                                    0,
1597                                    slice_batch);
1598     }
1599
1600     return;
1601 }
1602
1603 static void
1604 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1605                                    struct encode_state *encode_state,
1606                                    struct intel_encoder_context *encoder_context)
1607 {
1608     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1609     struct intel_batchbuffer *batch = encoder_context->base.batch;
1610     int i;
1611     intel_batchbuffer_start_atomic(batch, 0x4000); 
1612     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1613
1614     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1615         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1616     }
1617     {
1618         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1619         intel_batchbuffer_align(slice_batch, 8);
1620         BEGIN_BCS_BATCH(slice_batch, 2);
1621         OUT_BCS_BATCH(slice_batch, 0);
1622         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1623         ADVANCE_BCS_BATCH(slice_batch);
1624         mfc_context->aux_batchbuffer = NULL;
1625         intel_batchbuffer_free(slice_batch);
1626     }
1627     intel_batchbuffer_end_atomic(batch);
1628     intel_batchbuffer_flush(batch);
1629 }
1630
1631 static void
1632 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1633                                 struct encode_state *encode_state,
1634                                 struct intel_encoder_context *encoder_context)
1635 {
1636     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1637     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1638     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1639     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1640 }
1641
1642 static dri_bo *
1643 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1644                                    struct encode_state *encode_state,
1645                                    struct intel_encoder_context *encoder_context)
1646 {
1647     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1648
1649     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1650     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1651
1652     return mfc_context->aux_batchbuffer_surface.bo;
1653 }
1654
1655 #endif
1656
1657 static void
1658 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1659                                   struct encode_state *encode_state,
1660                                   struct intel_encoder_context *encoder_context)
1661 {
1662     struct intel_batchbuffer *batch = encoder_context->base.batch;
1663     dri_bo *slice_batch_bo;
1664
1665     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1666         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1667         assert(0);
1668         return; 
1669     }
1670
1671 #if MFC_SOFTWARE_HASWELL
1672     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1673 #else
1674     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1675 #endif
1676
1677     // begin programing
1678     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1679     intel_batchbuffer_emit_mi_flush(batch);
1680     
1681     // picture level programing
1682     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1683
1684     BEGIN_BCS_BATCH(batch, 2);
1685     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1686     OUT_BCS_RELOC(batch,
1687                   slice_batch_bo,
1688                   I915_GEM_DOMAIN_COMMAND, 0, 
1689                   0);
1690     ADVANCE_BCS_BATCH(batch);
1691
1692     // end programing
1693     intel_batchbuffer_end_atomic(batch);
1694
1695     dri_bo_unreference(slice_batch_bo);
1696 }
1697
1698
1699 static VAStatus
1700 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1701                              struct encode_state *encode_state,
1702                              struct intel_encoder_context *encoder_context)
1703 {
1704     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1705     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1706     int current_frame_bits_size;
1707     int sts;
1708  
1709     for (;;) {
1710         gen75_mfc_init(ctx, encode_state, encoder_context);
1711         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1712         /*Programing bcs pipeline*/
1713         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1714         gen75_mfc_run(ctx, encode_state, encoder_context);
1715         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1716             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1717             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1718             if (sts == BRC_NO_HRD_VIOLATION) {
1719                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1720                 break;
1721             }
1722             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1723                 if (!mfc_context->hrd.violation_noted) {
1724                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1725                     mfc_context->hrd.violation_noted = 1;
1726                 }
1727                 return VA_STATUS_SUCCESS;
1728             }
1729         } else {
1730             break;
1731         }
1732     }
1733
1734     return VA_STATUS_SUCCESS;
1735 }
1736
1737 /*
1738  * MPEG-2
1739  */
1740
1741 static const int
1742 va_to_gen75_mpeg2_picture_type[3] = {
1743     1,  /* I */
1744     2,  /* P */
1745     3   /* B */
1746 };
1747
1748 static void
1749 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1750                           struct intel_encoder_context *encoder_context,
1751                           struct encode_state *encode_state)
1752 {
1753     struct intel_batchbuffer *batch = encoder_context->base.batch;
1754     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1755     VAEncPictureParameterBufferMPEG2 *pic_param;
1756     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1757     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1758     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1759
1760     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1761     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1762     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1763
1764     BEGIN_BCS_BATCH(batch, 13);
1765     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1766     OUT_BCS_BATCH(batch,
1767                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1768                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1769                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1770                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1771                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1772                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1773                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1774                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1775                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1776                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1777                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1778                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1779     OUT_BCS_BATCH(batch,
1780                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1781                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1782                   0);
1783     OUT_BCS_BATCH(batch,
1784                   1 << 31 |     /* slice concealment */
1785                   (height_in_mbs - 1) << 16 |
1786                   (width_in_mbs - 1));
1787     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1788         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1789     else
1790         OUT_BCS_BATCH(batch, 0);
1791
1792     OUT_BCS_BATCH(batch, 0);
1793     OUT_BCS_BATCH(batch,
1794                   0xFFF << 16 | /* InterMBMaxSize */
1795                   0xFFF << 0 |  /* IntraMBMaxSize */
1796                   0);
1797     OUT_BCS_BATCH(batch, 0);
1798     OUT_BCS_BATCH(batch, 0);
1799     OUT_BCS_BATCH(batch, 0);
1800     OUT_BCS_BATCH(batch, 0);
1801     OUT_BCS_BATCH(batch, 0);
1802     OUT_BCS_BATCH(batch, 0);
1803     ADVANCE_BCS_BATCH(batch);
1804 }
1805
1806 static void
1807 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1808 {
1809     unsigned char intra_qm[64] = {
1810         8, 16, 19, 22, 26, 27, 29, 34,
1811         16, 16, 22, 24, 27, 29, 34, 37,
1812         19, 22, 26, 27, 29, 34, 34, 38,
1813         22, 22, 26, 27, 29, 34, 37, 40,
1814         22, 26, 27, 29, 32, 35, 40, 48,
1815         26, 27, 29, 32, 35, 40, 48, 58,
1816         26, 27, 29, 34, 38, 46, 56, 69,
1817         27, 29, 35, 38, 46, 56, 69, 83
1818     };
1819
1820     unsigned char non_intra_qm[64] = {
1821         16, 16, 16, 16, 16, 16, 16, 16,
1822         16, 16, 16, 16, 16, 16, 16, 16,
1823         16, 16, 16, 16, 16, 16, 16, 16,
1824         16, 16, 16, 16, 16, 16, 16, 16,
1825         16, 16, 16, 16, 16, 16, 16, 16,
1826         16, 16, 16, 16, 16, 16, 16, 16,
1827         16, 16, 16, 16, 16, 16, 16, 16,
1828         16, 16, 16, 16, 16, 16, 16, 16
1829     };
1830
1831     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1832     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1833 }
1834
1835 static void
1836 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1837 {
1838     unsigned short intra_fqm[64] = {
1839         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1840         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1841         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1842         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1843         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1844         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1845         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1846         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1847     };
1848
1849     unsigned short non_intra_fqm[64] = {
1850         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1851         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1852         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1853         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1854         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1855         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1856         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1857         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1858     };
1859
1860     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1861     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1862 }
1863
1864 static void
1865 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1866                                  struct intel_encoder_context *encoder_context,
1867                                  int x, int y,
1868                                  int next_x, int next_y,
1869                                  int is_fisrt_slice_group,
1870                                  int is_last_slice_group,
1871                                  int intra_slice,
1872                                  int qp,
1873                                  struct intel_batchbuffer *batch)
1874 {
1875     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1876
1877     if (batch == NULL)
1878         batch = encoder_context->base.batch;
1879
1880     BEGIN_BCS_BATCH(batch, 8);
1881
1882     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1883     OUT_BCS_BATCH(batch,
1884                   0 << 31 |                             /* MbRateCtrlFlag */
1885                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1886                   1 << 17 |                             /* Insert Header before the first slice group data */
1887                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1888                   1 << 15 |                             /* TailPresentFlag: always 1 */
1889                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1890                   !!intra_slice << 13 |                 /* IntraSlice */
1891                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1892                   0);
1893     OUT_BCS_BATCH(batch,
1894                   next_y << 24 |
1895                   next_x << 16 |
1896                   y << 8 |
1897                   x << 0 |
1898                   0);
1899     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1900     /* bitstream pointer is only loaded once for the first slice of a frame when 
1901      * LoadSlicePointerFlag is 0
1902      */
1903     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1904     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1905     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1906     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1907
1908     ADVANCE_BCS_BATCH(batch);
1909 }
1910
1911 static int
1912 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1913                                  struct intel_encoder_context *encoder_context,
1914                                  int x, int y,
1915                                  int first_mb_in_slice,
1916                                  int last_mb_in_slice,
1917                                  int first_mb_in_slice_group,
1918                                  int last_mb_in_slice_group,
1919                                  int mb_type,
1920                                  int qp_scale_code,
1921                                  int coded_block_pattern,
1922                                  unsigned char target_size_in_word,
1923                                  unsigned char max_size_in_word,
1924                                  struct intel_batchbuffer *batch)
1925 {
1926     int len_in_dwords = 9;
1927
1928     if (batch == NULL)
1929         batch = encoder_context->base.batch;
1930
1931     BEGIN_BCS_BATCH(batch, len_in_dwords);
1932
1933     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1934     OUT_BCS_BATCH(batch,
1935                   0 << 24 |     /* PackedMvNum */
1936                   0 << 20 |     /* MvFormat */
1937                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1938                   0 << 15 |     /* TransformFlag: frame DCT */
1939                   0 << 14 |     /* FieldMbFlag */
1940                   1 << 13 |     /* IntraMbFlag */
1941                   mb_type << 8 |   /* MbType: Intra */
1942                   0 << 2 |      /* SkipMbFlag */
1943                   0 << 0 |      /* InterMbMode */
1944                   0);
1945     OUT_BCS_BATCH(batch, y << 16 | x);
1946     OUT_BCS_BATCH(batch,
1947                   max_size_in_word << 24 |
1948                   target_size_in_word << 16 |
1949                   coded_block_pattern << 6 |      /* CBP */
1950                   0);
1951     OUT_BCS_BATCH(batch,
1952                   last_mb_in_slice << 31 |
1953                   first_mb_in_slice << 30 |
1954                   0 << 27 |     /* EnableCoeffClamp */
1955                   last_mb_in_slice_group << 26 |
1956                   0 << 25 |     /* MbSkipConvDisable */
1957                   first_mb_in_slice_group << 24 |
1958                   0 << 16 |     /* MvFieldSelect */
1959                   qp_scale_code << 0 |
1960                   0);
1961     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1962     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1963     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1964     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1965
1966     ADVANCE_BCS_BATCH(batch);
1967
1968     return len_in_dwords;
1969 }
1970
1971 #define MPEG2_INTER_MV_OFFSET   12 
1972
1973 static struct _mv_ranges
1974 {
1975     int low;    /* in the unit of 1/2 pixel */
1976     int high;   /* in the unit of 1/2 pixel */
1977 } mv_ranges[] = {
1978     {0, 0},
1979     {-16, 15},
1980     {-32, 31},
1981     {-64, 63},
1982     {-128, 127},
1983     {-256, 255},
1984     {-512, 511},
1985     {-1024, 1023},
1986     {-2048, 2047},
1987     {-4096, 4095}
1988 };
1989
1990 static int
1991 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1992 {
1993     if (mv + pos * 16 * 2 < 0 ||
1994         mv + (pos + 1) * 16 * 2 > display_max * 2)
1995         mv = 0;
1996
1997     if (f_code > 0 && f_code < 10) {
1998         if (mv < mv_ranges[f_code].low)
1999             mv = mv_ranges[f_code].low;
2000
2001         if (mv > mv_ranges[f_code].high)
2002             mv = mv_ranges[f_code].high;
2003     }
2004
2005     return mv;
2006 }
2007
2008 static int
2009 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2010                                  struct encode_state *encode_state,
2011                                  struct intel_encoder_context *encoder_context,
2012                                  unsigned int *msg,
2013                                  int width_in_mbs, int height_in_mbs,
2014                                  int x, int y,
2015                                  int first_mb_in_slice,
2016                                  int last_mb_in_slice,
2017                                  int first_mb_in_slice_group,
2018                                  int last_mb_in_slice_group,
2019                                  int qp_scale_code,
2020                                  unsigned char target_size_in_word,
2021                                  unsigned char max_size_in_word,
2022                                  struct intel_batchbuffer *batch)
2023 {
2024     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2025     int len_in_dwords = 9;
2026     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2027     
2028     if (batch == NULL)
2029         batch = encoder_context->base.batch;
2030
2031     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2032     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2033     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2034     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2035     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2036
2037     BEGIN_BCS_BATCH(batch, len_in_dwords);
2038
2039     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2040     OUT_BCS_BATCH(batch,
2041                   2 << 24 |     /* PackedMvNum */
2042                   7 << 20 |     /* MvFormat */
2043                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2044                   0 << 15 |     /* TransformFlag: frame DCT */
2045                   0 << 14 |     /* FieldMbFlag */
2046                   0 << 13 |     /* IntraMbFlag */
2047                   1 << 8 |      /* MbType: Frame-based */
2048                   0 << 2 |      /* SkipMbFlag */
2049                   0 << 0 |      /* InterMbMode */
2050                   0);
2051     OUT_BCS_BATCH(batch, y << 16 | x);
2052     OUT_BCS_BATCH(batch,
2053                   max_size_in_word << 24 |
2054                   target_size_in_word << 16 |
2055                   0x3f << 6 |   /* CBP */
2056                   0);
2057     OUT_BCS_BATCH(batch,
2058                   last_mb_in_slice << 31 |
2059                   first_mb_in_slice << 30 |
2060                   0 << 27 |     /* EnableCoeffClamp */
2061                   last_mb_in_slice_group << 26 |
2062                   0 << 25 |     /* MbSkipConvDisable */
2063                   first_mb_in_slice_group << 24 |
2064                   0 << 16 |     /* MvFieldSelect */
2065                   qp_scale_code << 0 |
2066                   0);
2067
2068     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2069     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2070     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2071     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2072
2073     ADVANCE_BCS_BATCH(batch);
2074
2075     return len_in_dwords;
2076 }
2077
2078 static void
2079 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2080                                            struct encode_state *encode_state,
2081                                            struct intel_encoder_context *encoder_context,
2082                                            struct intel_batchbuffer *slice_batch)
2083 {
2084     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2085     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2086
2087     if (encode_state->packed_header_data[idx]) {
2088         VAEncPackedHeaderParameterBuffer *param = NULL;
2089         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2090         unsigned int length_in_bits;
2091
2092         assert(encode_state->packed_header_param[idx]);
2093         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2094         length_in_bits = param->bit_length;
2095
2096         mfc_context->insert_object(ctx,
2097                                    encoder_context,
2098                                    header_data,
2099                                    ALIGN(length_in_bits, 32) >> 5,
2100                                    length_in_bits & 0x1f,
2101                                    5,   /* FIXME: check it */
2102                                    0,
2103                                    0,
2104                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2105                                    slice_batch);
2106     }
2107
2108     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2109
2110     if (encode_state->packed_header_data[idx]) {
2111         VAEncPackedHeaderParameterBuffer *param = NULL;
2112         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2113         unsigned int length_in_bits;
2114
2115         assert(encode_state->packed_header_param[idx]);
2116         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2117         length_in_bits = param->bit_length;
2118
2119         mfc_context->insert_object(ctx,
2120                                    encoder_context,
2121                                    header_data,
2122                                    ALIGN(length_in_bits, 32) >> 5,
2123                                    length_in_bits & 0x1f,
2124                                    5,   /* FIXME: check it */
2125                                    0,
2126                                    0,
2127                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2128                                    slice_batch);
2129     }
2130 }
2131
2132 static void 
2133 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2134                                      struct encode_state *encode_state,
2135                                      struct intel_encoder_context *encoder_context,
2136                                      int slice_index,
2137                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2138                                      struct intel_batchbuffer *slice_batch)
2139 {
2140     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2141     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2142     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2143     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2144     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2145     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2146     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2147     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2148     int i, j;
2149     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2150     unsigned int *msg = NULL;
2151     unsigned char *msg_ptr = NULL;
2152
2153     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2154     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2155     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2156     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2157
2158     dri_bo_map(vme_context->vme_output.bo , 0);
2159     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2160
2161     if (next_slice_group_param) {
2162         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2163         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2164     } else {
2165         h_next_start_pos = 0;
2166         v_next_start_pos = height_in_mbs;
2167     }
2168
2169     gen75_mfc_mpeg2_slicegroup_state(ctx,
2170                                      encoder_context,
2171                                      h_start_pos,
2172                                      v_start_pos,
2173                                      h_next_start_pos,
2174                                      v_next_start_pos,
2175                                      slice_index == 0,
2176                                      next_slice_group_param == NULL,
2177                                      slice_param->is_intra_slice,
2178                                      slice_param->quantiser_scale_code,
2179                                      slice_batch);
2180
2181     if (slice_index == 0) 
2182         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2183
2184     /* Insert '00' to make sure the header is valid */
2185     mfc_context->insert_object(ctx,
2186                                encoder_context,
2187                                (unsigned int*)section_delimiter,
2188                                1,
2189                                8,   /* 8bits in the last DWORD */
2190                                1,   /* 1 byte */
2191                                1,
2192                                0,
2193                                0,
2194                                slice_batch);
2195
2196     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2197         /* PAK for each macroblocks */
2198         for (j = 0; j < slice_param->num_macroblocks; j++) {
2199             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2200             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2201             int first_mb_in_slice = (j == 0);
2202             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2203             int first_mb_in_slice_group = (i == 0 && j == 0);
2204             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2205                                           j == slice_param->num_macroblocks - 1);
2206
2207             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2208
2209             if (slice_param->is_intra_slice) {
2210                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2211                                                  encoder_context,
2212                                                  h_pos, v_pos,
2213                                                  first_mb_in_slice,
2214                                                  last_mb_in_slice,
2215                                                  first_mb_in_slice_group,
2216                                                  last_mb_in_slice_group,
2217                                                  0x1a,
2218                                                  slice_param->quantiser_scale_code,
2219                                                  0x3f,
2220                                                  0,
2221                                                  0xff,
2222                                                  slice_batch);
2223             } else {
2224                 int inter_rdo, intra_rdo;
2225                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2226                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2227
2228                 if (intra_rdo < inter_rdo) 
2229                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2230                                                      encoder_context,
2231                                                      h_pos, v_pos,
2232                                                      first_mb_in_slice,
2233                                                      last_mb_in_slice,
2234                                                      first_mb_in_slice_group,
2235                                                      last_mb_in_slice_group,
2236                                                      0x1a,
2237                                                      slice_param->quantiser_scale_code,
2238                                                      0x3f,
2239                                                      0,
2240                                                      0xff,
2241                                                      slice_batch);
2242                 else
2243                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2244                                                      encode_state,
2245                                                      encoder_context,
2246                                                      msg,
2247                                                      width_in_mbs, height_in_mbs,
2248                                                      h_pos, v_pos,
2249                                                      first_mb_in_slice,
2250                                                      last_mb_in_slice,
2251                                                      first_mb_in_slice_group,
2252                                                      last_mb_in_slice_group,
2253                                                      slice_param->quantiser_scale_code,
2254                                                      0,
2255                                                      0xff,
2256                                                      slice_batch);
2257             }
2258         }
2259
2260         slice_param++;
2261     }
2262
2263     dri_bo_unmap(vme_context->vme_output.bo);
2264
2265     /* tail data */
2266     if (next_slice_group_param == NULL) { /* end of a picture */
2267         mfc_context->insert_object(ctx,
2268                                    encoder_context,
2269                                    (unsigned int *)tail_delimiter,
2270                                    2,
2271                                    8,   /* 8bits in the last DWORD */
2272                                    5,   /* 5 bytes */
2273                                    1,
2274                                    1,
2275                                    0,
2276                                    slice_batch);
2277     } else {        /* end of a lsice group */
2278         mfc_context->insert_object(ctx,
2279                                    encoder_context,
2280                                    (unsigned int *)section_delimiter,
2281                                    1,
2282                                    8,   /* 8bits in the last DWORD */
2283                                    1,   /* 1 byte */
2284                                    1,
2285                                    1,
2286                                    0,
2287                                    slice_batch);
2288     }
2289 }
2290
2291 /* 
2292  * A batch buffer for all slices, including slice state, 
2293  * slice insert object and slice pak object commands
2294  *
2295  */
2296 static dri_bo *
2297 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2298                                            struct encode_state *encode_state,
2299                                            struct intel_encoder_context *encoder_context)
2300 {
2301     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2302     struct intel_batchbuffer *batch;
2303     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2304     dri_bo *batch_bo;
2305     int i;
2306
2307     batch = mfc_context->aux_batchbuffer;
2308     batch_bo = batch->buffer;
2309
2310     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2311         if (i == encode_state->num_slice_params_ext - 1)
2312             next_slice_group_param = NULL;
2313         else
2314             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2315
2316         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2317     }
2318
2319     intel_batchbuffer_align(batch, 8);
2320     
2321     BEGIN_BCS_BATCH(batch, 2);
2322     OUT_BCS_BATCH(batch, 0);
2323     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2324     ADVANCE_BCS_BATCH(batch);
2325
2326     dri_bo_reference(batch_bo);
2327     intel_batchbuffer_free(batch);
2328     mfc_context->aux_batchbuffer = NULL;
2329
2330     return batch_bo;
2331 }
2332
2333 static void
2334 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2335                                             struct encode_state *encode_state,
2336                                             struct intel_encoder_context *encoder_context)
2337 {
2338     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2339
2340     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2341     mfc_context->set_surface_state(ctx, encoder_context);
2342     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2343     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2344     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2345     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2346     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2347     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2348 }
2349
2350 static void
2351 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2352                                     struct encode_state *encode_state,
2353                                     struct intel_encoder_context *encoder_context)
2354 {
2355     struct intel_batchbuffer *batch = encoder_context->base.batch;
2356     dri_bo *slice_batch_bo;
2357
2358     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2359
2360     // begin programing
2361     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2362     intel_batchbuffer_emit_mi_flush(batch);
2363     
2364     // picture level programing
2365     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2366
2367     BEGIN_BCS_BATCH(batch, 2);
2368     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2369     OUT_BCS_RELOC(batch,
2370                   slice_batch_bo,
2371                   I915_GEM_DOMAIN_COMMAND, 0, 
2372                   0);
2373     ADVANCE_BCS_BATCH(batch);
2374
2375     // end programing
2376     intel_batchbuffer_end_atomic(batch);
2377
2378     dri_bo_unreference(slice_batch_bo);
2379 }
2380
2381 static VAStatus
2382 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2383                         struct encode_state *encode_state,
2384                         struct intel_encoder_context *encoder_context)
2385 {
2386     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2387     struct object_surface *obj_surface; 
2388     struct object_buffer *obj_buffer;
2389     struct i965_coded_buffer_segment *coded_buffer_segment;
2390     VAStatus vaStatus = VA_STATUS_SUCCESS;
2391     dri_bo *bo;
2392     int i;
2393
2394     /* reconstructed surface */
2395     obj_surface = encode_state->reconstructed_object;
2396     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2397     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2398     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2399     mfc_context->surface_state.width = obj_surface->orig_width;
2400     mfc_context->surface_state.height = obj_surface->orig_height;
2401     mfc_context->surface_state.w_pitch = obj_surface->width;
2402     mfc_context->surface_state.h_pitch = obj_surface->height;
2403
2404     /* forward reference */
2405     obj_surface = encode_state->reference_objects[0];
2406
2407     if (obj_surface && obj_surface->bo) {
2408         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2409         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2410     } else
2411         mfc_context->reference_surfaces[0].bo = NULL;
2412
2413     /* backward reference */
2414     obj_surface = encode_state->reference_objects[1];
2415
2416     if (obj_surface && obj_surface->bo) {
2417         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2418         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2419     } else {
2420         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2421
2422         if (mfc_context->reference_surfaces[1].bo)
2423             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2424     }
2425
2426     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2427         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2428
2429         if (mfc_context->reference_surfaces[i].bo)
2430             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2431     }
2432     
2433     /* input YUV surface */
2434     obj_surface = encode_state->input_yuv_object;
2435     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2436     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2437
2438     /* coded buffer */
2439     obj_buffer = encode_state->coded_buf_object;
2440     bo = obj_buffer->buffer_store->bo;
2441     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2442     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2443     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2444     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2445
2446     /* set the internal flag to 0 to indicate the coded size is unknown */
2447     dri_bo_map(bo, 1);
2448     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2449     coded_buffer_segment->mapped = 0;
2450     coded_buffer_segment->codec = encoder_context->codec;
2451     dri_bo_unmap(bo);
2452
2453     return vaStatus;
2454 }
2455
2456 static VAStatus
2457 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2458                                struct encode_state *encode_state,
2459                                struct intel_encoder_context *encoder_context)
2460 {
2461     gen75_mfc_init(ctx, encode_state, encoder_context);
2462     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2463     /*Programing bcs pipeline*/
2464     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2465     gen75_mfc_run(ctx, encode_state, encoder_context);
2466
2467     return VA_STATUS_SUCCESS;
2468 }
2469
2470 static void
2471 gen75_mfc_context_destroy(void *context)
2472 {
2473     struct gen6_mfc_context *mfc_context = context;
2474     int i;
2475
2476     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2477     mfc_context->post_deblocking_output.bo = NULL;
2478
2479     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2480     mfc_context->pre_deblocking_output.bo = NULL;
2481
2482     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2483     mfc_context->uncompressed_picture_source.bo = NULL;
2484
2485     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2486     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2487
2488     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2489         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2490         mfc_context->direct_mv_buffers[i].bo = NULL;
2491     }
2492
2493     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2494     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2495
2496     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2497     mfc_context->macroblock_status_buffer.bo = NULL;
2498
2499     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2500     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2501
2502     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2503     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2504
2505     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2506         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2507         mfc_context->reference_surfaces[i].bo = NULL;  
2508     }
2509
2510     i965_gpe_context_destroy(&mfc_context->gpe_context);
2511
2512     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2513     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2514
2515     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2516     mfc_context->aux_batchbuffer_surface.bo = NULL;
2517
2518     if (mfc_context->aux_batchbuffer)
2519         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2520
2521     mfc_context->aux_batchbuffer = NULL;
2522
2523     free(mfc_context);
2524 }
2525
2526 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2527                                    VAProfile profile,
2528                                    struct encode_state *encode_state,
2529                                    struct intel_encoder_context *encoder_context)
2530 {
2531     VAStatus vaStatus;
2532
2533     switch (profile) {
2534     case VAProfileH264ConstrainedBaseline:
2535     case VAProfileH264Main:
2536     case VAProfileH264High:
2537         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2538         break;
2539
2540         /* FIXME: add for other profile */
2541     case VAProfileMPEG2Simple:
2542     case VAProfileMPEG2Main:
2543         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2544         break;
2545
2546     default:
2547         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2548         break;
2549     }
2550
2551     return vaStatus;
2552 }
2553
2554 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2555 {
2556     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2557
2558     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2559
2560     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2561     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2562
2563     mfc_context->gpe_context.curbe.length = 32 * 4;
2564
2565     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2566     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2567     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2568     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2569     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2570
2571     i965_gpe_load_kernels(ctx,
2572                           &mfc_context->gpe_context,
2573                           gen75_mfc_kernels,
2574                           1);
2575
2576     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2577     mfc_context->set_surface_state = gen75_mfc_surface_state;
2578     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2579     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2580     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2581     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2582     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2583     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2584
2585     encoder_context->mfc_context = mfc_context;
2586     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2587     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2588     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2589
2590     return True;
2591 }