Follow the input Picture/Slice parameters to generate slice header/data
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                            int standard_select,
90                            struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94     assert(standard_select == MFX_FORMAT_MPEG2 ||
95            standard_select == MFX_FORMAT_AVC);
96
97     BEGIN_BCS_BATCH(batch, 5);
98
99     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
100     OUT_BCS_BATCH(batch,
101                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
102                   (MFD_MODE_VLD << 15) | /* VLD mode */
103                   (0 << 10) | /* Stream-Out Enable */
104                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
105                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
106                   (0 << 5)  | /* not in stitch mode */
107                   (1 << 4)  | /* encoding mode */
108                   (standard_select << 0));  /* standard select: avc or mpeg2 */
109     OUT_BCS_BATCH(batch,
110                   (0 << 7)  | /* expand NOA bus flag */
111                   (0 << 6)  | /* disable slice-level clock gating */
112                   (0 << 5)  | /* disable clock gating for NOA */
113                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
114                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
115                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
116                   (0 << 1)  |
117                   (0 << 0));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch, 0);
120
121     ADVANCE_BCS_BATCH(batch);
122 }
123
124 static void
125 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 {
127     struct intel_batchbuffer *batch = encoder_context->base.batch;
128     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129
130     BEGIN_BCS_BATCH(batch, 6);
131
132     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
133     OUT_BCS_BATCH(batch, 0);
134     OUT_BCS_BATCH(batch,
135                   ((mfc_context->surface_state.height - 1) << 18) |
136                   ((mfc_context->surface_state.width - 1) << 4));
137     OUT_BCS_BATCH(batch,
138                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
139                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
140                   (0 << 22) | /* surface object control state, FIXME??? */
141                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
142                   (0 << 2)  | /* must be 0 for interleave U/V */
143                   (1 << 1)  | /* must be tiled */
144                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
145     OUT_BCS_BATCH(batch,
146                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
147                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
148     OUT_BCS_BATCH(batch, 0);
149
150     ADVANCE_BCS_BATCH(batch);
151 }
152
153 static void
154 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
155                                         struct intel_encoder_context *encoder_context)
156 {
157     struct intel_batchbuffer *batch = encoder_context->base.batch;
158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
160
161     BEGIN_BCS_BATCH(batch, 26);
162
163     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
164     /* the DW1-3 is for the MFX indirect bistream offset */
165     OUT_BCS_BATCH(batch, 0);
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     /* the DW4-5 is the MFX upper bound */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171
172     /* the DW6-10 is for MFX Indirect MV Object Base Address */
173     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
174     OUT_BCS_BATCH(batch, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
177     OUT_BCS_BATCH(batch, 0);
178
179     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185
186     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192
193     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
194     OUT_BCS_RELOC(batch,
195                   mfc_context->mfc_indirect_pak_bse_object.bo,
196                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                   0);
198     OUT_BCS_BATCH(batch, 0);
199     OUT_BCS_BATCH(batch, 0);
200         
201     OUT_BCS_RELOC(batch,
202                   mfc_context->mfc_indirect_pak_bse_object.bo,
203                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
205     OUT_BCS_BATCH(batch, 0);
206
207     ADVANCE_BCS_BATCH(batch);
208 }
209
210 static void
211 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 {
213     struct intel_batchbuffer *batch = encoder_context->base.batch;
214     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
215     struct gen6_vme_context *vme_context = encoder_context->vme_context;
216     struct i965_driver_data *i965 = i965_driver_data(ctx);
217
218     if (IS_STEPPING_BPLUS(i965)) {
219         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
220         return;
221     }
222
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                         struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262     /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268     /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294     /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300     /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303     /* DW10. Bit setting for MB */
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306     /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309     /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                    int qm_type,
319                    unsigned int *qm,
320                    int qm_length,
321                    struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                     int fqm_type,
356                     unsigned int *fqm,
357                     int fqm_length,
358                     struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                             struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                            struct encode_state *encode_state,
422                            struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     int width_in_mbs = 0;
429     int height_in_mbs = 0;
430
431     if (encoder_context->codec == CODEC_H264) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     /*Encode common setup for MFC*/
445     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
446     mfc_context->post_deblocking_output.bo = NULL;
447
448     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
449     mfc_context->pre_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
452     mfc_context->uncompressed_picture_source.bo = NULL;
453
454     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
455     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
456
457     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
458         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
459         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
460         mfc_context->direct_mv_buffers[i].bo = NULL;
461     }
462
463     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
464         if (mfc_context->reference_surfaces[i].bo != NULL)
465             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
466         mfc_context->reference_surfaces[i].bo = NULL;  
467     }
468
469     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
470     bo = dri_bo_alloc(i965->intel.bufmgr,
471                       "Buffer",
472                       width_in_mbs * 64,
473                       64);
474     assert(bo);
475     mfc_context->intra_row_store_scratch_buffer.bo = bo;
476
477     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
478     bo = dri_bo_alloc(i965->intel.bufmgr,
479                       "Buffer",
480                       width_in_mbs * height_in_mbs * 16,
481                       64);
482     assert(bo);
483     mfc_context->macroblock_status_buffer.bo = bo;
484
485     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
486     bo = dri_bo_alloc(i965->intel.bufmgr,
487                       "Buffer",
488                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
489                       64);
490     assert(bo);
491     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
492
493     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
494     bo = dri_bo_alloc(i965->intel.bufmgr,
495                       "Buffer",
496                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
497                       0x1000);
498     assert(bo);
499     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
500
501     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
502     mfc_context->mfc_batchbuffer_surface.bo = NULL;
503
504     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
505     mfc_context->aux_batchbuffer_surface.bo = NULL;
506
507     if (mfc_context->aux_batchbuffer)
508         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
509
510     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
511     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
512     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
513     mfc_context->aux_batchbuffer_surface.pitch = 16;
514     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
515     mfc_context->aux_batchbuffer_surface.size_block = 16;
516
517     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
518 }
519
520 static void
521 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
522                                     struct intel_encoder_context *encoder_context)
523 {
524     struct intel_batchbuffer *batch = encoder_context->base.batch;
525     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
526     int i;
527
528     BEGIN_BCS_BATCH(batch, 61);
529
530     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
531
532     /* the DW1-3 is for pre_deblocking */
533     if (mfc_context->pre_deblocking_output.bo)
534         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
535                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
536                       0);
537     else
538         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
539
540     OUT_BCS_BATCH(batch, 0);
541     OUT_BCS_BATCH(batch, 0);
542     /* the DW4-6 is for the post_deblocking */
543
544     if (mfc_context->post_deblocking_output.bo)
545         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
546                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                       0);                                                                                       /* post output addr  */ 
548     else
549         OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551     OUT_BCS_BATCH(batch, 0);
552
553     /* the DW7-9 is for the uncompressed_picture */
554     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
555                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556                   0); /* uncompressed data */
557
558     OUT_BCS_BATCH(batch, 0);
559     OUT_BCS_BATCH(batch, 0);
560
561     /* the DW10-12 is for the mb status */
562     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0); /* StreamOut data*/
565     OUT_BCS_BATCH(batch, 0);
566     OUT_BCS_BATCH(batch, 0);
567
568     /* the DW13-15 is for the intra_row_store_scratch */
569     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
570                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571                   0);   
572     OUT_BCS_BATCH(batch, 0);
573     OUT_BCS_BATCH(batch, 0);
574
575     /* the DW16-18 is for the deblocking filter */
576     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
577                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                   0);
579     OUT_BCS_BATCH(batch, 0);
580     OUT_BCS_BATCH(batch, 0);
581
582     /* the DW 19-50 is for Reference pictures*/
583     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
584         if ( mfc_context->reference_surfaces[i].bo != NULL) {
585             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
586                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
587                           0);                   
588         } else {
589             OUT_BCS_BATCH(batch, 0);
590         }
591         OUT_BCS_BATCH(batch, 0);
592     }
593     OUT_BCS_BATCH(batch, 0);
594
595     /* The DW 52-54 is for the MB status buffer */
596     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
597                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598                   0);                                                                                   /* Macroblock status buffer*/
599         
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602
603     /* the DW 55-57 is the ILDB buffer */
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606     OUT_BCS_BATCH(batch, 0);
607
608     /* the DW 58-60 is the second ILDB buffer */
609     OUT_BCS_BATCH(batch, 0);
610     OUT_BCS_BATCH(batch, 0);
611     OUT_BCS_BATCH(batch, 0);
612     ADVANCE_BCS_BATCH(batch);
613 }
614
615 static void
616 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
617 {
618     struct intel_batchbuffer *batch = encoder_context->base.batch;
619     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
620     struct i965_driver_data *i965 = i965_driver_data(ctx);
621     int i;
622
623     if (IS_STEPPING_BPLUS(i965)) {
624         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
625         return;
626     }
627
628     BEGIN_BCS_BATCH(batch, 25);
629
630     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
631
632     if (mfc_context->pre_deblocking_output.bo)
633         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
634                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
635                       0);
636     else
637         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
638
639     if (mfc_context->post_deblocking_output.bo)
640         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
641                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                       0);                                                                                       /* post output addr  */ 
643     else
644         OUT_BCS_BATCH(batch, 0);
645
646     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
647                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
648                   0);                                                                                   /* uncompressed data */
649     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* StreamOut data*/
652     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);   
655     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);
658     /* 7..22 Reference pictures*/
659     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
660         if ( mfc_context->reference_surfaces[i].bo != NULL) {
661             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
662                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
663                           0);                   
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666         }
667     }
668     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
669                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
670                   0);                                                                                   /* Macroblock status buffer*/
671
672     OUT_BCS_BATCH(batch, 0);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
679                                      struct intel_encoder_context *encoder_context)
680 {
681     struct intel_batchbuffer *batch = encoder_context->base.batch;
682     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
683
684     int i;
685
686     BEGIN_BCS_BATCH(batch, 71);
687
688     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
689
690     /* Reference frames and Current frames */
691     /* the DW1-32 is for the direct MV for reference */
692     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
693         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
694             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697             OUT_BCS_BATCH(batch, 0);
698         } else {
699             OUT_BCS_BATCH(batch, 0);
700             OUT_BCS_BATCH(batch, 0);
701         }
702     }
703     OUT_BCS_BATCH(batch, 0);
704
705     /* the DW34-36 is the MV for the current reference */
706     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
707                   I915_GEM_DOMAIN_INSTRUCTION, 0,
708                   0);
709
710     OUT_BCS_BATCH(batch, 0);
711     OUT_BCS_BATCH(batch, 0);
712
713     /* POL list */
714     for(i = 0; i < 32; i++) {
715         OUT_BCS_BATCH(batch, i/2);
716     }
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719
720     ADVANCE_BCS_BATCH(batch);
721 }
722
723 static void
724 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
725 {
726     struct intel_batchbuffer *batch = encoder_context->base.batch;
727     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
728     struct i965_driver_data *i965 = i965_driver_data(ctx);
729     int i;
730
731     if (IS_STEPPING_BPLUS(i965)) {
732         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
733         return;
734     }
735
736     BEGIN_BCS_BATCH(batch, 69);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
739
740     /* Reference frames and Current frames */
741     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
742         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
743             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
744                           I915_GEM_DOMAIN_INSTRUCTION, 0,
745                           0);
746         } else {
747             OUT_BCS_BATCH(batch, 0);
748         }
749     }
750
751     /* POL list */
752     for(i = 0; i < 32; i++) {
753         OUT_BCS_BATCH(batch, i/2);
754     }
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757
758     ADVANCE_BCS_BATCH(batch);
759 }
760
761
762 static void
763 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
764                                         struct intel_encoder_context *encoder_context)
765 {
766     struct intel_batchbuffer *batch = encoder_context->base.batch;
767     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
768
769     BEGIN_BCS_BATCH(batch, 10);
770
771     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
772     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
773                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
774                   0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777         
778     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782
783     /* the DW7-9 is for Bitplane Read Buffer Base Address */
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786     OUT_BCS_BATCH(batch, 0);
787
788     ADVANCE_BCS_BATCH(batch);
789 }
790
791 static void
792 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
793 {
794     struct intel_batchbuffer *batch = encoder_context->base.batch;
795     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797
798     if (IS_STEPPING_BPLUS(i965)) {
799         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
800         return;
801     }
802
803     BEGIN_BCS_BATCH(batch, 4);
804
805     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
806     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
807                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
808                   0);
809     OUT_BCS_BATCH(batch, 0);
810     OUT_BCS_BATCH(batch, 0);
811
812     ADVANCE_BCS_BATCH(batch);
813 }
814
815
816 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
817                                                        struct encode_state *encode_state,
818                                                        struct intel_encoder_context *encoder_context)
819 {
820     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
821
822     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
823     mfc_context->set_surface_state(ctx, encoder_context);
824     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
825     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
826     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
827     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
828     mfc_context->avc_qm_state(ctx, encoder_context);
829     mfc_context->avc_fqm_state(ctx, encoder_context);
830     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
831     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
832 }
833
834
835 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
836                               struct encode_state *encode_state,
837                               struct intel_encoder_context *encoder_context)
838 {
839     struct intel_batchbuffer *batch = encoder_context->base.batch;
840
841     intel_batchbuffer_flush(batch);             //run the pipeline
842
843     return VA_STATUS_SUCCESS;
844 }
845
846
847 static VAStatus
848 gen75_mfc_stop(VADriverContextP ctx, 
849                struct encode_state *encode_state,
850                struct intel_encoder_context *encoder_context,
851                int *encoded_bits_size)
852 {
853     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
854     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855     VACodedBufferSegment *coded_buffer_segment;
856     
857     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
858     assert(vaStatus == VA_STATUS_SUCCESS);
859     *encoded_bits_size = coded_buffer_segment->size * 8;
860     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
861
862     return VA_STATUS_SUCCESS;
863 }
864
865
866 static void
867 gen75_mfc_avc_slice_state(VADriverContextP ctx,
868                           VAEncPictureParameterBufferH264 *pic_param,
869                           VAEncSliceParameterBufferH264 *slice_param,
870                           struct encode_state *encode_state,
871                           struct intel_encoder_context *encoder_context,
872                           int rate_control_enable,
873                           int qp,
874                           struct intel_batchbuffer *batch)
875 {
876     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
877     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
878     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
879     int beginmb = slice_param->macroblock_address;
880     int endmb = beginmb + slice_param->num_macroblocks;
881     int beginx = beginmb % width_in_mbs;
882     int beginy = beginmb / width_in_mbs;
883     int nextx =  endmb % width_in_mbs;
884     int nexty = endmb / width_in_mbs;
885     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
886     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
887     int maxQpN, maxQpP;
888     unsigned char correct[6], grow, shrink;
889     int i;
890     int weighted_pred_idc = 0;
891     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
892     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
893     int num_ref_l0 = 0, num_ref_l1 = 0;
894
895     if (batch == NULL)
896         batch = encoder_context->base.batch;
897
898     if (slice_type == SLICE_TYPE_I) {
899         luma_log2_weight_denom = 0;
900         chroma_log2_weight_denom = 0;
901     } else if (slice_type == SLICE_TYPE_P) {
902         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
903         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
904
905         if (slice_param->num_ref_idx_active_override_flag)
906             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
907     } else if (slice_type == SLICE_TYPE_B) {
908         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
909         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
910         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
911
912         if (slice_param->num_ref_idx_active_override_flag) {
913             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
914             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
915         }
916
917         if (weighted_pred_idc == 2) {
918             /* 8.4.3 - Derivation process for prediction weights (8-279) */
919             luma_log2_weight_denom = 5;
920             chroma_log2_weight_denom = 5;
921         }
922     }
923
924     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
925     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
926
927     for (i = 0; i < 6; i++)
928         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
929
930     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
931         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
932     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
933         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
934
935     BEGIN_BCS_BATCH(batch, 11);;
936
937     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
938     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
939
940     OUT_BCS_BATCH(batch,
941                   (num_ref_l0 << 16) |
942                   (num_ref_l1 << 24) |
943                   (chroma_log2_weight_denom << 8) |
944                   (luma_log2_weight_denom << 0));
945
946     OUT_BCS_BATCH(batch, 
947                   (weighted_pred_idc << 30) |
948                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
949                   (slice_param->disable_deblocking_filter_idc << 27) |
950                   (slice_param->cabac_init_idc << 24) |
951                   (qp<<16) |                    /*Slice Quantization Parameter*/
952                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
953                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
954     OUT_BCS_BATCH(batch,
955                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
956                   (beginx << 16) |
957                   slice_param->macroblock_address );
958     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
959     OUT_BCS_BATCH(batch, 
960                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
961                   (1 << 30) |           /*ResetRateControlCounter*/
962                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
963                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
964                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
965                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
966                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
967                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
968                   (last_slice << 19) |     /*IsLastSlice*/
969                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
970                   (1 << 17) |       /*HeaderPresentFlag*/       
971                   (1 << 16) |       /*SliceData PresentFlag*/
972                   (1 << 15) |       /*TailPresentFlag*/
973                   (1 << 13) |       /*RBSP NAL TYPE*/   
974                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
975     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
976     OUT_BCS_BATCH(batch,
977                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
978                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
979                   (shrink << 8)  |
980                   (grow << 0));   
981     OUT_BCS_BATCH(batch,
982                   (correct[5] << 20) |
983                   (correct[4] << 16) |
984                   (correct[3] << 12) |
985                   (correct[2] << 8) |
986                   (correct[1] << 4) |
987                   (correct[0] << 0));
988     OUT_BCS_BATCH(batch, 0);
989
990     ADVANCE_BCS_BATCH(batch);
991 }
992
993
994 #ifdef MFC_SOFTWARE_HASWELL
995
996 static int
997 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
998                                int qp,unsigned int *msg,
999                                struct intel_encoder_context *encoder_context,
1000                                unsigned char target_mb_size, unsigned char max_mb_size,
1001                                struct intel_batchbuffer *batch)
1002 {
1003     int len_in_dwords = 12;
1004     unsigned int intra_msg;
1005 #define         INTRA_MSG_FLAG          (1 << 13)
1006 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1007     if (batch == NULL)
1008         batch = encoder_context->base.batch;
1009
1010     BEGIN_BCS_BATCH(batch, len_in_dwords);
1011
1012     intra_msg = msg[0] & 0xC0FF;
1013     intra_msg |= INTRA_MSG_FLAG;
1014     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1015     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 
1019                   (0 << 24) |           /* PackedMvNum, Debug*/
1020                   (0 << 20) |           /* No motion vector */
1021                   (1 << 19) |           /* CbpDcY */
1022                   (1 << 18) |           /* CbpDcU */
1023                   (1 << 17) |           /* CbpDcV */
1024                   intra_msg);
1025
1026     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1027     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1028     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1029
1030     /*Stuff for Intra MB*/
1031     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1032     OUT_BCS_BATCH(batch, msg[2]);       
1033     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1034     
1035     /*MaxSizeInWord and TargetSzieInWord*/
1036     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1037                   (target_mb_size << 16) );
1038
1039     OUT_BCS_BATCH(batch, 0);
1040
1041     ADVANCE_BCS_BATCH(batch);
1042
1043     return len_in_dwords;
1044 }
1045
1046 static int
1047 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1048                                unsigned int *msg, unsigned int offset,
1049                                struct intel_encoder_context *encoder_context,
1050                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1051                                struct intel_batchbuffer *batch)
1052 {
1053     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1054     int len_in_dwords = 12;
1055     unsigned int inter_msg = 0;
1056     if (batch == NULL)
1057         batch = encoder_context->base.batch;
1058     {
1059 #define MSG_MV_OFFSET   4
1060         unsigned int *mv_ptr;
1061         mv_ptr = msg + MSG_MV_OFFSET;
1062         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1063          * to convert them to be compatible with the format of AVC_PAK
1064          * command.
1065          */
1066         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1067             /* MV[0] and MV[2] are replicated */
1068             mv_ptr[4] = mv_ptr[0];
1069             mv_ptr[5] = mv_ptr[1];
1070             mv_ptr[2] = mv_ptr[8];
1071             mv_ptr[3] = mv_ptr[9];
1072             mv_ptr[6] = mv_ptr[8];
1073             mv_ptr[7] = mv_ptr[9];
1074         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1075             /* MV[0] and MV[1] are replicated */
1076             mv_ptr[2] = mv_ptr[0];
1077             mv_ptr[3] = mv_ptr[1];
1078             mv_ptr[4] = mv_ptr[16];
1079             mv_ptr[5] = mv_ptr[17];
1080             mv_ptr[6] = mv_ptr[24];
1081             mv_ptr[7] = mv_ptr[25];
1082         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1083                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1084             /* Don't touch MV[0] or MV[1] */
1085             mv_ptr[2] = mv_ptr[8];
1086             mv_ptr[3] = mv_ptr[9];
1087             mv_ptr[4] = mv_ptr[16];
1088             mv_ptr[5] = mv_ptr[17];
1089             mv_ptr[6] = mv_ptr[24];
1090             mv_ptr[7] = mv_ptr[25];
1091         }
1092     }
1093
1094     BEGIN_BCS_BATCH(batch, len_in_dwords);
1095
1096     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1097
1098     inter_msg = 32;
1099     /* MV quantity */
1100     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1101         if (msg[1] & SUBMB_SHAPE_MASK)
1102             inter_msg = 128;
1103     }
1104     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1105     OUT_BCS_BATCH(batch, offset);
1106     inter_msg = msg[0] & (0x1F00FFFF);
1107     inter_msg |= INTER_MV8;
1108     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1109     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1110         (msg[1] & SUBMB_SHAPE_MASK)) {
1111         inter_msg |= INTER_MV32;
1112     }
1113
1114     OUT_BCS_BATCH(batch, inter_msg);
1115
1116     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1117     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1118 #if 0 
1119     if ( slice_type == SLICE_TYPE_B) {
1120         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1121     } else {
1122         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1123     }
1124 #else
1125     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1126 #endif
1127
1128     inter_msg = msg[1] >> 8;
1129     /*Stuff for Inter MB*/
1130     OUT_BCS_BATCH(batch, inter_msg);        
1131     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1132     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1133
1134     /*MaxSizeInWord and TargetSzieInWord*/
1135     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1136                   (target_mb_size << 16) );
1137
1138     OUT_BCS_BATCH(batch, 0x0);    
1139
1140     ADVANCE_BCS_BATCH(batch);
1141
1142     return len_in_dwords;
1143 }
1144
1145 #define         AVC_INTRA_RDO_OFFSET    4
1146 #define         AVC_INTER_RDO_OFFSET    10
1147 #define         AVC_INTER_MSG_OFFSET    8       
1148 #define         AVC_INTER_MV_OFFSET             48
1149 #define         AVC_RDO_MASK            0xFFFF
1150
1151 static void 
1152 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1153                                         struct encode_state *encode_state,
1154                                         struct intel_encoder_context *encoder_context,
1155                                         int slice_index,
1156                                         struct intel_batchbuffer *slice_batch)
1157 {
1158     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1159     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1160     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1161     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1162     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1163     unsigned int *msg = NULL, offset = 0;
1164     unsigned char *msg_ptr = NULL;
1165     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1166     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1167     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1168     int i,x,y;
1169     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1170     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1171     unsigned char *slice_header = NULL;
1172     int slice_header_length_in_bits = 0;
1173     unsigned int tail_data[] = { 0x0, 0x0 };
1174     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1175     int is_intra = slice_type == SLICE_TYPE_I;
1176
1177     if (rate_control_mode == VA_RC_CBR) {
1178         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1179         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1180     }
1181
1182     /* only support for 8-bit pixel bit-depth */
1183     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1184     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1185     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1186     assert(qp >= 0 && qp < 52);
1187
1188     gen75_mfc_avc_slice_state(ctx, 
1189                               pPicParameter,
1190                               pSliceParameter,
1191                               encode_state, encoder_context,
1192                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1193
1194     if ( slice_index == 0) 
1195         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1196
1197     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1198
1199     // slice hander
1200     mfc_context->insert_object(ctx, encoder_context,
1201                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1202                                5,  /* first 5 bytes are start code + nal unit type */
1203                                1, 0, 1, slice_batch);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220
1221         if (is_intra) {
1222             assert(msg);
1223             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224         } else {
1225             int inter_rdo, intra_rdo;
1226             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1227             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1228             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1229             if (intra_rdo < inter_rdo) { 
1230                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1231             } else {
1232                 msg += AVC_INTER_MSG_OFFSET;
1233                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1234             }
1235         }
1236     }
1237    
1238     dri_bo_unmap(vme_context->vme_output.bo);
1239
1240     if ( last_slice ) {    
1241         mfc_context->insert_object(ctx, encoder_context,
1242                                    tail_data, 2, 8,
1243                                    2, 1, 1, 0, slice_batch);
1244     } else {
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 1, 8,
1247                                    1, 1, 1, 0, slice_batch);
1248     }
1249
1250     free(slice_header);
1251
1252 }
1253
1254 static dri_bo *
1255 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1256                                    struct encode_state *encode_state,
1257                                    struct intel_encoder_context *encoder_context)
1258 {
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch;
1261     dri_bo *batch_bo;
1262     int i;
1263     int buffer_size;
1264     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1266     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1267
1268     buffer_size = width_in_mbs * height_in_mbs * 64;
1269     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1270     batch_bo = batch->buffer;
1271     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1272         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1273     }
1274
1275     intel_batchbuffer_align(batch, 8);
1276     
1277     BEGIN_BCS_BATCH(batch, 2);
1278     OUT_BCS_BATCH(batch, 0);
1279     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1280     ADVANCE_BCS_BATCH(batch);
1281
1282     dri_bo_reference(batch_bo);
1283     intel_batchbuffer_free(batch);
1284
1285     return batch_bo;
1286 }
1287
1288 #else
1289
1290 static void
1291 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1292                                      struct encode_state *encode_state,
1293                                      struct intel_encoder_context *encoder_context)
1294
1295 {
1296     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1297     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1298
1299     assert(vme_context->vme_output.bo);
1300     mfc_context->buffer_suface_setup(ctx,
1301                                      &mfc_context->gpe_context,
1302                                      &vme_context->vme_output,
1303                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1304                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1305     assert(mfc_context->aux_batchbuffer_surface.bo);
1306     mfc_context->buffer_suface_setup(ctx,
1307                                      &mfc_context->gpe_context,
1308                                      &mfc_context->aux_batchbuffer_surface,
1309                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1310                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1311 }
1312
1313 static void
1314 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1315                                       struct encode_state *encode_state,
1316                                       struct intel_encoder_context *encoder_context)
1317
1318 {
1319     struct i965_driver_data *i965 = i965_driver_data(ctx);
1320     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1321     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1322     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1323     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1324     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1325     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1326     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1327     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1328                                                            "MFC batchbuffer",
1329                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1330                                                            0x1000);
1331     mfc_context->buffer_suface_setup(ctx,
1332                                      &mfc_context->gpe_context,
1333                                      &mfc_context->mfc_batchbuffer_surface,
1334                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1335                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1336 }
1337
1338 static void
1339 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1340                                      struct encode_state *encode_state,
1341                                      struct intel_encoder_context *encoder_context)
1342 {
1343     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1344     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1345 }
1346
1347 static void
1348 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1349                                  struct encode_state *encode_state,
1350                                  struct intel_encoder_context *encoder_context)
1351 {
1352     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1353     struct gen6_interface_descriptor_data *desc;   
1354     int i;
1355     dri_bo *bo;
1356
1357     bo = mfc_context->gpe_context.idrt.bo;
1358     dri_bo_map(bo, 1);
1359     assert(bo->virtual);
1360     desc = bo->virtual;
1361
1362     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1363         struct i965_kernel *kernel;
1364
1365         kernel = &mfc_context->gpe_context.kernels[i];
1366         assert(sizeof(*desc) == 32);
1367
1368         /*Setup the descritor table*/
1369         memset(desc, 0, sizeof(*desc));
1370         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1371         desc->desc2.sampler_count = 0;
1372         desc->desc2.sampler_state_pointer = 0;
1373         desc->desc3.binding_table_entry_count = 2;
1374         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1375         desc->desc4.constant_urb_entry_read_offset = 0;
1376         desc->desc4.constant_urb_entry_read_length = 4;
1377                 
1378         /*kernel start*/
1379         dri_bo_emit_reloc(bo,   
1380                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1381                           0,
1382                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1383                           kernel->bo);
1384         desc++;
1385     }
1386
1387     dri_bo_unmap(bo);
1388 }
1389
1390 static void
1391 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1392                                      struct encode_state *encode_state,
1393                                      struct intel_encoder_context *encoder_context)
1394 {
1395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1396     
1397     (void)mfc_context;
1398 }
1399
1400 static void
1401 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1402                                           int index,
1403                                           int head_offset,
1404                                           int batchbuffer_offset,
1405                                           int head_size,
1406                                           int tail_size,
1407                                           int number_mb_cmds,
1408                                           int first_object,
1409                                           int last_object,
1410                                           int last_slice,
1411                                           int mb_x,
1412                                           int mb_y,
1413                                           int width_in_mbs,
1414                                           int qp)
1415 {
1416     BEGIN_BATCH(batch, 12);
1417     
1418     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1419     OUT_BATCH(batch, index);
1420     OUT_BATCH(batch, 0);
1421     OUT_BATCH(batch, 0);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424    
1425     /*inline data */
1426     OUT_BATCH(batch, head_offset);
1427     OUT_BATCH(batch, batchbuffer_offset);
1428     OUT_BATCH(batch, 
1429               head_size << 16 |
1430               tail_size);
1431     OUT_BATCH(batch,
1432               number_mb_cmds << 16 |
1433               first_object << 2 |
1434               last_object << 1 |
1435               last_slice);
1436     OUT_BATCH(batch,
1437               mb_y << 8 |
1438               mb_x);
1439     OUT_BATCH(batch,
1440               qp << 16 |
1441               width_in_mbs);
1442
1443     ADVANCE_BATCH(batch);
1444 }
1445
1446 static void
1447 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1448                                         struct intel_encoder_context *encoder_context,
1449                                         VAEncSliceParameterBufferH264 *slice_param,
1450                                         int head_offset,
1451                                         unsigned short head_size,
1452                                         unsigned short tail_size,
1453                                         int batchbuffer_offset,
1454                                         int qp,
1455                                         int last_slice)
1456 {
1457     struct intel_batchbuffer *batch = encoder_context->base.batch;
1458     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1459     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1460     int total_mbs = slice_param->num_macroblocks;
1461     int number_mb_cmds = 128;
1462     int starting_mb = 0;
1463     int last_object = 0;
1464     int first_object = 1;
1465     int i;
1466     int mb_x, mb_y;
1467     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1468
1469     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1470         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1471         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1472         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1473         assert(mb_x <= 255 && mb_y <= 255);
1474
1475         starting_mb += number_mb_cmds;
1476
1477         gen75_mfc_batchbuffer_emit_object_command(batch,
1478                                                   index,
1479                                                   head_offset,
1480                                                   batchbuffer_offset,
1481                                                   head_size,
1482                                                   tail_size,
1483                                                   number_mb_cmds,
1484                                                   first_object,
1485                                                   last_object,
1486                                                   last_slice,
1487                                                   mb_x,
1488                                                   mb_y,
1489                                                   width_in_mbs,
1490                                                   qp);
1491
1492         if (first_object) {
1493             head_offset += head_size;
1494             batchbuffer_offset += head_size;
1495         }
1496
1497         if (last_object) {
1498             head_offset += tail_size;
1499             batchbuffer_offset += tail_size;
1500         }
1501
1502         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1503
1504         first_object = 0;
1505     }
1506
1507     if (!last_object) {
1508         last_object = 1;
1509         number_mb_cmds = total_mbs % number_mb_cmds;
1510         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1511         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1512         assert(mb_x <= 255 && mb_y <= 255);
1513         starting_mb += number_mb_cmds;
1514
1515         gen75_mfc_batchbuffer_emit_object_command(batch,
1516                                                   index,
1517                                                   head_offset,
1518                                                   batchbuffer_offset,
1519                                                   head_size,
1520                                                   tail_size,
1521                                                   number_mb_cmds,
1522                                                   first_object,
1523                                                   last_object,
1524                                                   last_slice,
1525                                                   mb_x,
1526                                                   mb_y,
1527                                                   width_in_mbs,
1528                                                   qp);
1529     }
1530 }
1531                           
1532 /*
1533  * return size in Owords (16bytes)
1534  */         
1535 static int
1536 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1537                                 struct encode_state *encode_state,
1538                                 struct intel_encoder_context *encoder_context,
1539                                 int slice_index,
1540                                 int batchbuffer_offset)
1541 {
1542     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1543     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1544     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1545     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1546     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1547     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1548     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1549     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1550     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1551     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1552     unsigned char *slice_header = NULL;
1553     int slice_header_length_in_bits = 0;
1554     unsigned int tail_data[] = { 0x0, 0x0 };
1555     long head_offset;
1556     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1557     unsigned short head_size, tail_size;
1558     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1559
1560     if (rate_control_mode == VA_RC_CBR) {
1561         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1562         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1563     }
1564
1565     /* only support for 8-bit pixel bit-depth */
1566     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1567     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1568     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1569     assert(qp >= 0 && qp < 52);
1570
1571     head_offset = old_used / 16;
1572     gen75_mfc_avc_slice_state(ctx,
1573                               pPicParameter,
1574                               pSliceParameter,
1575                               encode_state,
1576                               encoder_context,
1577                               (rate_control_mode == VA_RC_CBR),
1578                               qp,
1579                               slice_batch);
1580
1581     if (slice_index == 0)
1582         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1583
1584     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1585
1586     // slice hander
1587     mfc_context->insert_object(ctx,
1588                                encoder_context,
1589                                (unsigned int *)slice_header,
1590                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1591                                slice_header_length_in_bits & 0x1f,
1592                                5,  /* first 5 bytes are start code + nal unit type */
1593                                1,
1594                                0,
1595                                1,
1596                                slice_batch);
1597     free(slice_header);
1598
1599     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1600     used = intel_batchbuffer_used_size(slice_batch);
1601     head_size = (used - old_used) / 16;
1602     old_used = used;
1603
1604     /* tail */
1605     if (last_slice) {    
1606         mfc_context->insert_object(ctx,
1607                                    encoder_context,
1608                                    tail_data,
1609                                    2,
1610                                    8,
1611                                    2,
1612                                    1,
1613                                    1,
1614                                    0,
1615                                    slice_batch);
1616     } else {
1617         mfc_context->insert_object(ctx,
1618                                    encoder_context,
1619                                    tail_data,
1620                                    1,
1621                                    8,
1622                                    1,
1623                                    1,
1624                                    1,
1625                                    0,
1626                                    slice_batch);
1627     }
1628
1629     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1630     used = intel_batchbuffer_used_size(slice_batch);
1631     tail_size = (used - old_used) / 16;
1632
1633    
1634     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1635                                             encoder_context,
1636                                             pSliceParameter,
1637                                             head_offset,
1638                                             head_size,
1639                                             tail_size,
1640                                             batchbuffer_offset,
1641                                             qp,
1642                                             last_slice);
1643
1644     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1645 }
1646
1647 static void
1648 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1649                                    struct encode_state *encode_state,
1650                                    struct intel_encoder_context *encoder_context)
1651 {
1652     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1653     struct intel_batchbuffer *batch = encoder_context->base.batch;
1654     int i, size, offset = 0;
1655     intel_batchbuffer_start_atomic(batch, 0x4000); 
1656     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1657
1658     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1659         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1660         offset += size;
1661     }
1662
1663     intel_batchbuffer_end_atomic(batch);
1664     intel_batchbuffer_flush(batch);
1665 }
1666
1667 static void
1668 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1669                                 struct encode_state *encode_state,
1670                                 struct intel_encoder_context *encoder_context)
1671 {
1672     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1673     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1674     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1675     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1676 }
1677
1678 static dri_bo *
1679 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1680                                    struct encode_state *encode_state,
1681                                    struct intel_encoder_context *encoder_context)
1682 {
1683     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1684
1685     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1686     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1687
1688     return mfc_context->mfc_batchbuffer_surface.bo;
1689 }
1690
1691 #endif
1692
1693 static void
1694 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1695                                   struct encode_state *encode_state,
1696                                   struct intel_encoder_context *encoder_context)
1697 {
1698     struct intel_batchbuffer *batch = encoder_context->base.batch;
1699     dri_bo *slice_batch_bo;
1700
1701     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1702         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1703         assert(0);
1704         return; 
1705     }
1706
1707 #ifdef MFC_SOFTWARE_HASWELL
1708     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1709 #else
1710     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1711 #endif
1712
1713     // begin programing
1714     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1715     intel_batchbuffer_emit_mi_flush(batch);
1716     
1717     // picture level programing
1718     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1719
1720     BEGIN_BCS_BATCH(batch, 2);
1721     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1722     OUT_BCS_RELOC(batch,
1723                   slice_batch_bo,
1724                   I915_GEM_DOMAIN_COMMAND, 0, 
1725                   0);
1726     ADVANCE_BCS_BATCH(batch);
1727
1728     // end programing
1729     intel_batchbuffer_end_atomic(batch);
1730
1731     dri_bo_unreference(slice_batch_bo);
1732 }
1733
1734
1735 static VAStatus
1736 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1737                              struct encode_state *encode_state,
1738                              struct intel_encoder_context *encoder_context)
1739 {
1740     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1741     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1742     int current_frame_bits_size;
1743     int sts;
1744  
1745     for (;;) {
1746         gen75_mfc_init(ctx, encode_state, encoder_context);
1747         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1748         /*Programing bcs pipeline*/
1749         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1750         gen75_mfc_run(ctx, encode_state, encoder_context);
1751         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1752             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1753             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1754             if (sts == BRC_NO_HRD_VIOLATION) {
1755                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1756                 break;
1757             }
1758             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1759                 if (!mfc_context->hrd.violation_noted) {
1760                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1761                     mfc_context->hrd.violation_noted = 1;
1762                 }
1763                 return VA_STATUS_SUCCESS;
1764             }
1765         } else {
1766             break;
1767         }
1768     }
1769
1770     return VA_STATUS_SUCCESS;
1771 }
1772
1773 /*
1774  * MPEG-2
1775  */
1776
1777 static const int
1778 va_to_gen75_mpeg2_picture_type[3] = {
1779     1,  /* I */
1780     2,  /* P */
1781     3   /* B */
1782 };
1783
1784 static void
1785 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1786                           struct intel_encoder_context *encoder_context,
1787                           struct encode_state *encode_state)
1788 {
1789     struct intel_batchbuffer *batch = encoder_context->base.batch;
1790     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1791     VAEncPictureParameterBufferMPEG2 *pic_param;
1792     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1793     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1794     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1795
1796     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1797     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1798     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1799
1800     BEGIN_BCS_BATCH(batch, 13);
1801     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1802     OUT_BCS_BATCH(batch,
1803                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1804                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1805                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1806                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1807                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1808                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1809                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1810                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1811                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1812                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1813                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1814                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1815     OUT_BCS_BATCH(batch,
1816                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1817                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1818                   0);
1819     OUT_BCS_BATCH(batch,
1820                   1 << 31 |     /* slice concealment */
1821                   (height_in_mbs - 1) << 16 |
1822                   (width_in_mbs - 1));
1823     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1824         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1825     else
1826         OUT_BCS_BATCH(batch, 0);
1827
1828     OUT_BCS_BATCH(batch, 0);
1829     OUT_BCS_BATCH(batch,
1830                   0xFFF << 16 | /* InterMBMaxSize */
1831                   0xFFF << 0 |  /* IntraMBMaxSize */
1832                   0);
1833     OUT_BCS_BATCH(batch, 0);
1834     OUT_BCS_BATCH(batch, 0);
1835     OUT_BCS_BATCH(batch, 0);
1836     OUT_BCS_BATCH(batch, 0);
1837     OUT_BCS_BATCH(batch, 0);
1838     OUT_BCS_BATCH(batch, 0);
1839     ADVANCE_BCS_BATCH(batch);
1840 }
1841
1842 static void
1843 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1844 {
1845     unsigned char intra_qm[64] = {
1846         8, 16, 19, 22, 26, 27, 29, 34,
1847         16, 16, 22, 24, 27, 29, 34, 37,
1848         19, 22, 26, 27, 29, 34, 34, 38,
1849         22, 22, 26, 27, 29, 34, 37, 40,
1850         22, 26, 27, 29, 32, 35, 40, 48,
1851         26, 27, 29, 32, 35, 40, 48, 58,
1852         26, 27, 29, 34, 38, 46, 56, 69,
1853         27, 29, 35, 38, 46, 56, 69, 83
1854     };
1855
1856     unsigned char non_intra_qm[64] = {
1857         16, 16, 16, 16, 16, 16, 16, 16,
1858         16, 16, 16, 16, 16, 16, 16, 16,
1859         16, 16, 16, 16, 16, 16, 16, 16,
1860         16, 16, 16, 16, 16, 16, 16, 16,
1861         16, 16, 16, 16, 16, 16, 16, 16,
1862         16, 16, 16, 16, 16, 16, 16, 16,
1863         16, 16, 16, 16, 16, 16, 16, 16,
1864         16, 16, 16, 16, 16, 16, 16, 16
1865     };
1866
1867     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1868     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1869 }
1870
1871 static void
1872 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1873 {
1874     unsigned short intra_fqm[64] = {
1875         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1876         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1877         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1878         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1879         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1880         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1881         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1882         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1883     };
1884
1885     unsigned short non_intra_fqm[64] = {
1886         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1887         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1888         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1889         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1890         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1891         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1892         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1893         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1894     };
1895
1896     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1897     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1898 }
1899
1900 static void
1901 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1902                                  struct intel_encoder_context *encoder_context,
1903                                  int x, int y,
1904                                  int next_x, int next_y,
1905                                  int is_fisrt_slice_group,
1906                                  int is_last_slice_group,
1907                                  int intra_slice,
1908                                  int qp,
1909                                  struct intel_batchbuffer *batch)
1910 {
1911     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1912
1913     if (batch == NULL)
1914         batch = encoder_context->base.batch;
1915
1916     BEGIN_BCS_BATCH(batch, 8);
1917
1918     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1919     OUT_BCS_BATCH(batch,
1920                   0 << 31 |                             /* MbRateCtrlFlag */
1921                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1922                   1 << 17 |                             /* Insert Header before the first slice group data */
1923                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1924                   1 << 15 |                             /* TailPresentFlag: always 1 */
1925                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1926                   !!intra_slice << 13 |                 /* IntraSlice */
1927                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1928                   0);
1929     OUT_BCS_BATCH(batch,
1930                   next_y << 24 |
1931                   next_x << 16 |
1932                   y << 8 |
1933                   x << 0 |
1934                   0);
1935     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1936     /* bitstream pointer is only loaded once for the first slice of a frame when 
1937      * LoadSlicePointerFlag is 0
1938      */
1939     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1940     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1941     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1942     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1943
1944     ADVANCE_BCS_BATCH(batch);
1945 }
1946
1947 static int
1948 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1949                                  struct intel_encoder_context *encoder_context,
1950                                  int x, int y,
1951                                  int first_mb_in_slice,
1952                                  int last_mb_in_slice,
1953                                  int first_mb_in_slice_group,
1954                                  int last_mb_in_slice_group,
1955                                  int mb_type,
1956                                  int qp_scale_code,
1957                                  int coded_block_pattern,
1958                                  unsigned char target_size_in_word,
1959                                  unsigned char max_size_in_word,
1960                                  struct intel_batchbuffer *batch)
1961 {
1962     int len_in_dwords = 9;
1963
1964     if (batch == NULL)
1965         batch = encoder_context->base.batch;
1966
1967     BEGIN_BCS_BATCH(batch, len_in_dwords);
1968
1969     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1970     OUT_BCS_BATCH(batch,
1971                   0 << 24 |     /* PackedMvNum */
1972                   0 << 20 |     /* MvFormat */
1973                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1974                   0 << 15 |     /* TransformFlag: frame DCT */
1975                   0 << 14 |     /* FieldMbFlag */
1976                   1 << 13 |     /* IntraMbFlag */
1977                   mb_type << 8 |   /* MbType: Intra */
1978                   0 << 2 |      /* SkipMbFlag */
1979                   0 << 0 |      /* InterMbMode */
1980                   0);
1981     OUT_BCS_BATCH(batch, y << 16 | x);
1982     OUT_BCS_BATCH(batch,
1983                   max_size_in_word << 24 |
1984                   target_size_in_word << 16 |
1985                   coded_block_pattern << 6 |      /* CBP */
1986                   0);
1987     OUT_BCS_BATCH(batch,
1988                   last_mb_in_slice << 31 |
1989                   first_mb_in_slice << 30 |
1990                   0 << 27 |     /* EnableCoeffClamp */
1991                   last_mb_in_slice_group << 26 |
1992                   0 << 25 |     /* MbSkipConvDisable */
1993                   first_mb_in_slice_group << 24 |
1994                   0 << 16 |     /* MvFieldSelect */
1995                   qp_scale_code << 0 |
1996                   0);
1997     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1998     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1999     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2000     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2001
2002     ADVANCE_BCS_BATCH(batch);
2003
2004     return len_in_dwords;
2005 }
2006
2007 #define MPEG2_INTER_MV_OFFSET   12 
2008
2009 static struct _mv_ranges
2010 {
2011     int low;    /* in the unit of 1/2 pixel */
2012     int high;   /* in the unit of 1/2 pixel */
2013 } mv_ranges[] = {
2014     {0, 0},
2015     {-16, 15},
2016     {-32, 31},
2017     {-64, 63},
2018     {-128, 127},
2019     {-256, 255},
2020     {-512, 511},
2021     {-1024, 1023},
2022     {-2048, 2047},
2023     {-4096, 4095}
2024 };
2025
2026 static int
2027 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2028 {
2029     if (mv + pos * 16 * 2 < 0 ||
2030         mv + (pos + 1) * 16 * 2 > display_max * 2)
2031         mv = 0;
2032
2033     if (f_code > 0 && f_code < 10) {
2034         if (mv < mv_ranges[f_code].low)
2035             mv = mv_ranges[f_code].low;
2036
2037         if (mv > mv_ranges[f_code].high)
2038             mv = mv_ranges[f_code].high;
2039     }
2040
2041     return mv;
2042 }
2043
2044 static int
2045 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2046                                  struct encode_state *encode_state,
2047                                  struct intel_encoder_context *encoder_context,
2048                                  unsigned int *msg,
2049                                  int width_in_mbs, int height_in_mbs,
2050                                  int x, int y,
2051                                  int first_mb_in_slice,
2052                                  int last_mb_in_slice,
2053                                  int first_mb_in_slice_group,
2054                                  int last_mb_in_slice_group,
2055                                  int qp_scale_code,
2056                                  unsigned char target_size_in_word,
2057                                  unsigned char max_size_in_word,
2058                                  struct intel_batchbuffer *batch)
2059 {
2060     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2061     int len_in_dwords = 9;
2062     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2063     
2064     if (batch == NULL)
2065         batch = encoder_context->base.batch;
2066
2067     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2068     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2069     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2070     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2071     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2072
2073     BEGIN_BCS_BATCH(batch, len_in_dwords);
2074
2075     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2076     OUT_BCS_BATCH(batch,
2077                   2 << 24 |     /* PackedMvNum */
2078                   7 << 20 |     /* MvFormat */
2079                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2080                   0 << 15 |     /* TransformFlag: frame DCT */
2081                   0 << 14 |     /* FieldMbFlag */
2082                   0 << 13 |     /* IntraMbFlag */
2083                   1 << 8 |      /* MbType: Frame-based */
2084                   0 << 2 |      /* SkipMbFlag */
2085                   0 << 0 |      /* InterMbMode */
2086                   0);
2087     OUT_BCS_BATCH(batch, y << 16 | x);
2088     OUT_BCS_BATCH(batch,
2089                   max_size_in_word << 24 |
2090                   target_size_in_word << 16 |
2091                   0x3f << 6 |   /* CBP */
2092                   0);
2093     OUT_BCS_BATCH(batch,
2094                   last_mb_in_slice << 31 |
2095                   first_mb_in_slice << 30 |
2096                   0 << 27 |     /* EnableCoeffClamp */
2097                   last_mb_in_slice_group << 26 |
2098                   0 << 25 |     /* MbSkipConvDisable */
2099                   first_mb_in_slice_group << 24 |
2100                   0 << 16 |     /* MvFieldSelect */
2101                   qp_scale_code << 0 |
2102                   0);
2103
2104     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2105     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2106     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2107     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2108
2109     ADVANCE_BCS_BATCH(batch);
2110
2111     return len_in_dwords;
2112 }
2113
2114 static void
2115 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2116                                            struct encode_state *encode_state,
2117                                            struct intel_encoder_context *encoder_context,
2118                                            struct intel_batchbuffer *slice_batch)
2119 {
2120     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2121     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2122
2123     if (encode_state->packed_header_data[idx]) {
2124         VAEncPackedHeaderParameterBuffer *param = NULL;
2125         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2126         unsigned int length_in_bits;
2127
2128         assert(encode_state->packed_header_param[idx]);
2129         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2130         length_in_bits = param->bit_length;
2131
2132         mfc_context->insert_object(ctx,
2133                                    encoder_context,
2134                                    header_data,
2135                                    ALIGN(length_in_bits, 32) >> 5,
2136                                    length_in_bits & 0x1f,
2137                                    5,   /* FIXME: check it */
2138                                    0,
2139                                    0,
2140                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2141                                    slice_batch);
2142     }
2143
2144     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2145
2146     if (encode_state->packed_header_data[idx]) {
2147         VAEncPackedHeaderParameterBuffer *param = NULL;
2148         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2149         unsigned int length_in_bits;
2150
2151         assert(encode_state->packed_header_param[idx]);
2152         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2153         length_in_bits = param->bit_length;
2154
2155         mfc_context->insert_object(ctx,
2156                                    encoder_context,
2157                                    header_data,
2158                                    ALIGN(length_in_bits, 32) >> 5,
2159                                    length_in_bits & 0x1f,
2160                                    5,   /* FIXME: check it */
2161                                    0,
2162                                    0,
2163                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2164                                    slice_batch);
2165     }
2166 }
2167
2168 static void 
2169 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2170                                      struct encode_state *encode_state,
2171                                      struct intel_encoder_context *encoder_context,
2172                                      int slice_index,
2173                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2174                                      struct intel_batchbuffer *slice_batch)
2175 {
2176     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2177     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2178     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2179     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2180     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2181     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2182     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2183     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2184     int i, j;
2185     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2186     unsigned int *msg = NULL;
2187     unsigned char *msg_ptr = NULL;
2188
2189     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2190     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2191     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2192     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2193
2194     dri_bo_map(vme_context->vme_output.bo , 0);
2195     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2196
2197     if (next_slice_group_param) {
2198         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2199         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2200     } else {
2201         h_next_start_pos = 0;
2202         v_next_start_pos = height_in_mbs;
2203     }
2204
2205     gen75_mfc_mpeg2_slicegroup_state(ctx,
2206                                      encoder_context,
2207                                      h_start_pos,
2208                                      v_start_pos,
2209                                      h_next_start_pos,
2210                                      v_next_start_pos,
2211                                      slice_index == 0,
2212                                      next_slice_group_param == NULL,
2213                                      slice_param->is_intra_slice,
2214                                      slice_param->quantiser_scale_code,
2215                                      slice_batch);
2216
2217     if (slice_index == 0) 
2218         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2219
2220     /* Insert '00' to make sure the header is valid */
2221     mfc_context->insert_object(ctx,
2222                                encoder_context,
2223                                (unsigned int*)section_delimiter,
2224                                1,
2225                                8,   /* 8bits in the last DWORD */
2226                                1,   /* 1 byte */
2227                                1,
2228                                0,
2229                                0,
2230                                slice_batch);
2231
2232     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2233         /* PAK for each macroblocks */
2234         for (j = 0; j < slice_param->num_macroblocks; j++) {
2235             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2236             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2237             int first_mb_in_slice = (j == 0);
2238             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2239             int first_mb_in_slice_group = (i == 0 && j == 0);
2240             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2241                                           j == slice_param->num_macroblocks - 1);
2242
2243             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2244
2245             if (slice_param->is_intra_slice) {
2246                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2247                                                  encoder_context,
2248                                                  h_pos, v_pos,
2249                                                  first_mb_in_slice,
2250                                                  last_mb_in_slice,
2251                                                  first_mb_in_slice_group,
2252                                                  last_mb_in_slice_group,
2253                                                  0x1a,
2254                                                  slice_param->quantiser_scale_code,
2255                                                  0x3f,
2256                                                  0,
2257                                                  0xff,
2258                                                  slice_batch);
2259             } else {
2260                 int inter_rdo, intra_rdo;
2261                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2262                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2263
2264                 if (intra_rdo < inter_rdo) 
2265                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2266                                                      encoder_context,
2267                                                      h_pos, v_pos,
2268                                                      first_mb_in_slice,
2269                                                      last_mb_in_slice,
2270                                                      first_mb_in_slice_group,
2271                                                      last_mb_in_slice_group,
2272                                                      0x1a,
2273                                                      slice_param->quantiser_scale_code,
2274                                                      0x3f,
2275                                                      0,
2276                                                      0xff,
2277                                                      slice_batch);
2278                 else
2279                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2280                                                      encode_state,
2281                                                      encoder_context,
2282                                                      msg,
2283                                                      width_in_mbs, height_in_mbs,
2284                                                      h_pos, v_pos,
2285                                                      first_mb_in_slice,
2286                                                      last_mb_in_slice,
2287                                                      first_mb_in_slice_group,
2288                                                      last_mb_in_slice_group,
2289                                                      slice_param->quantiser_scale_code,
2290                                                      0,
2291                                                      0xff,
2292                                                      slice_batch);
2293             }
2294         }
2295
2296         slice_param++;
2297     }
2298
2299     dri_bo_unmap(vme_context->vme_output.bo);
2300
2301     /* tail data */
2302     if (next_slice_group_param == NULL) { /* end of a picture */
2303         mfc_context->insert_object(ctx,
2304                                    encoder_context,
2305                                    (unsigned int *)tail_delimiter,
2306                                    2,
2307                                    8,   /* 8bits in the last DWORD */
2308                                    5,   /* 5 bytes */
2309                                    1,
2310                                    1,
2311                                    0,
2312                                    slice_batch);
2313     } else {        /* end of a lsice group */
2314         mfc_context->insert_object(ctx,
2315                                    encoder_context,
2316                                    (unsigned int *)section_delimiter,
2317                                    1,
2318                                    8,   /* 8bits in the last DWORD */
2319                                    1,   /* 1 byte */
2320                                    1,
2321                                    1,
2322                                    0,
2323                                    slice_batch);
2324     }
2325 }
2326
2327 /* 
2328  * A batch buffer for all slices, including slice state, 
2329  * slice insert object and slice pak object commands
2330  *
2331  */
2332 static dri_bo *
2333 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2334                                            struct encode_state *encode_state,
2335                                            struct intel_encoder_context *encoder_context)
2336 {
2337     struct i965_driver_data *i965 = i965_driver_data(ctx);
2338     struct intel_batchbuffer *batch;
2339     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2340     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2341     dri_bo *batch_bo;
2342     int i;
2343     int buffer_size;
2344     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2345     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2346
2347     buffer_size = width_in_mbs * height_in_mbs * 64;
2348     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2349     batch_bo = batch->buffer;
2350
2351     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2352         if (i == encode_state->num_slice_params_ext - 1)
2353             next_slice_group_param = NULL;
2354         else
2355             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2356
2357         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2358     }
2359
2360     intel_batchbuffer_align(batch, 8);
2361     
2362     BEGIN_BCS_BATCH(batch, 2);
2363     OUT_BCS_BATCH(batch, 0);
2364     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2365     ADVANCE_BCS_BATCH(batch);
2366
2367     dri_bo_reference(batch_bo);
2368     intel_batchbuffer_free(batch);
2369
2370     return batch_bo;
2371 }
2372
2373 static void
2374 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2375                                             struct encode_state *encode_state,
2376                                             struct intel_encoder_context *encoder_context)
2377 {
2378     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2379
2380     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2381     mfc_context->set_surface_state(ctx, encoder_context);
2382     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2383     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2384     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2385     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2386     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2387     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2388 }
2389
2390 static void
2391 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2392                                     struct encode_state *encode_state,
2393                                     struct intel_encoder_context *encoder_context)
2394 {
2395     struct intel_batchbuffer *batch = encoder_context->base.batch;
2396     dri_bo *slice_batch_bo;
2397
2398     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2399
2400     // begin programing
2401     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2402     intel_batchbuffer_emit_mi_flush(batch);
2403     
2404     // picture level programing
2405     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2406
2407     BEGIN_BCS_BATCH(batch, 2);
2408     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2409     OUT_BCS_RELOC(batch,
2410                   slice_batch_bo,
2411                   I915_GEM_DOMAIN_COMMAND, 0, 
2412                   0);
2413     ADVANCE_BCS_BATCH(batch);
2414
2415     // end programing
2416     intel_batchbuffer_end_atomic(batch);
2417
2418     dri_bo_unreference(slice_batch_bo);
2419 }
2420
2421 static VAStatus
2422 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2423                         struct encode_state *encode_state,
2424                         struct intel_encoder_context *encoder_context)
2425 {
2426     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2427     struct object_surface *obj_surface; 
2428     struct object_buffer *obj_buffer;
2429     struct i965_coded_buffer_segment *coded_buffer_segment;
2430     VAStatus vaStatus = VA_STATUS_SUCCESS;
2431     dri_bo *bo;
2432     int i;
2433
2434     /* reconstructed surface */
2435     obj_surface = encode_state->reconstructed_object;
2436     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2437     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2438     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2439     mfc_context->surface_state.width = obj_surface->orig_width;
2440     mfc_context->surface_state.height = obj_surface->orig_height;
2441     mfc_context->surface_state.w_pitch = obj_surface->width;
2442     mfc_context->surface_state.h_pitch = obj_surface->height;
2443
2444     /* forward reference */
2445     obj_surface = encode_state->reference_objects[0];
2446
2447     if (obj_surface && obj_surface->bo) {
2448         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2449         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2450     } else
2451         mfc_context->reference_surfaces[0].bo = NULL;
2452
2453     /* backward reference */
2454     obj_surface = encode_state->reference_objects[1];
2455
2456     if (obj_surface && obj_surface->bo) {
2457         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2458         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2459     } else {
2460         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2461
2462         if (mfc_context->reference_surfaces[1].bo)
2463             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2464     }
2465
2466     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2467         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2468
2469         if (mfc_context->reference_surfaces[i].bo)
2470             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2471     }
2472     
2473     /* input YUV surface */
2474     obj_surface = encode_state->input_yuv_object;
2475     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2476     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2477
2478     /* coded buffer */
2479     obj_buffer = encode_state->coded_buf_object;
2480     bo = obj_buffer->buffer_store->bo;
2481     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2482     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2483     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2484     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2485
2486     /* set the internal flag to 0 to indicate the coded size is unknown */
2487     dri_bo_map(bo, 1);
2488     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2489     coded_buffer_segment->mapped = 0;
2490     coded_buffer_segment->codec = encoder_context->codec;
2491     dri_bo_unmap(bo);
2492
2493     return vaStatus;
2494 }
2495
2496 static VAStatus
2497 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2498                                struct encode_state *encode_state,
2499                                struct intel_encoder_context *encoder_context)
2500 {
2501     gen75_mfc_init(ctx, encode_state, encoder_context);
2502     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2503     /*Programing bcs pipeline*/
2504     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2505     gen75_mfc_run(ctx, encode_state, encoder_context);
2506
2507     return VA_STATUS_SUCCESS;
2508 }
2509
2510 static void
2511 gen75_mfc_context_destroy(void *context)
2512 {
2513     struct gen6_mfc_context *mfc_context = context;
2514     int i;
2515
2516     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2517     mfc_context->post_deblocking_output.bo = NULL;
2518
2519     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2520     mfc_context->pre_deblocking_output.bo = NULL;
2521
2522     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2523     mfc_context->uncompressed_picture_source.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2526     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2527
2528     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2529         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2530         mfc_context->direct_mv_buffers[i].bo = NULL;
2531     }
2532
2533     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2534     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2535
2536     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2537     mfc_context->macroblock_status_buffer.bo = NULL;
2538
2539     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2540     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2541
2542     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2543     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2544
2545     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2546         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2547         mfc_context->reference_surfaces[i].bo = NULL;  
2548     }
2549
2550     i965_gpe_context_destroy(&mfc_context->gpe_context);
2551
2552     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2553     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2554
2555     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2556     mfc_context->aux_batchbuffer_surface.bo = NULL;
2557
2558     if (mfc_context->aux_batchbuffer)
2559         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2560
2561     mfc_context->aux_batchbuffer = NULL;
2562
2563     free(mfc_context);
2564 }
2565
2566 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2567                                    VAProfile profile,
2568                                    struct encode_state *encode_state,
2569                                    struct intel_encoder_context *encoder_context)
2570 {
2571     VAStatus vaStatus;
2572
2573     switch (profile) {
2574     case VAProfileH264Baseline:
2575     case VAProfileH264Main:
2576     case VAProfileH264High:
2577         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2578         break;
2579
2580         /* FIXME: add for other profile */
2581     case VAProfileMPEG2Simple:
2582     case VAProfileMPEG2Main:
2583         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2584         break;
2585
2586     default:
2587         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2588         break;
2589     }
2590
2591     return vaStatus;
2592 }
2593
2594 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2595 {
2596     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2597
2598     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2599
2600     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2601     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2602
2603     mfc_context->gpe_context.curbe.length = 32 * 4;
2604
2605     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2606     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2607     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2608     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2609     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2610
2611     i965_gpe_load_kernels(ctx,
2612                           &mfc_context->gpe_context,
2613                           gen75_mfc_kernels,
2614                           NUM_MFC_KERNEL);
2615
2616     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2617     mfc_context->set_surface_state = gen75_mfc_surface_state;
2618     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2619     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2620     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2621     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2622     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2623     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2624
2625     encoder_context->mfc_context = mfc_context;
2626     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2627     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2628     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2629
2630     return True;
2631 }