Encoder: directly use the objects for the reconstructed picture and coded buffer
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94
95     assert(standard_select == MFX_FORMAT_MPEG2 ||
96            standard_select == MFX_FORMAT_AVC);
97
98     BEGIN_BCS_BATCH(batch, 5);
99
100     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
101     OUT_BCS_BATCH(batch,
102                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103                   (MFD_MODE_VLD << 15) | /* VLD mode */
104                   (0 << 10) | /* Stream-Out Enable */
105                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
106                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
107                   (0 << 5)  | /* not in stitch mode */
108                   (1 << 4)  | /* encoding mode */
109                   (standard_select << 0));  /* standard select: avc or mpeg2 */
110     OUT_BCS_BATCH(batch,
111                   (0 << 7)  | /* expand NOA bus flag */
112                   (0 << 6)  | /* disable slice-level clock gating */
113                   (0 << 5)  | /* disable clock gating for NOA */
114                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
115                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
116                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
117                   (0 << 1)  |
118                   (0 << 0));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch, 0);
121
122     ADVANCE_BCS_BATCH(batch);
123 }
124
125 static void
126 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
127 {
128     struct intel_batchbuffer *batch = encoder_context->base.batch;
129     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
130
131     BEGIN_BCS_BATCH(batch, 6);
132
133     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch,
136                   ((mfc_context->surface_state.height - 1) << 18) |
137                   ((mfc_context->surface_state.width - 1) << 4));
138     OUT_BCS_BATCH(batch,
139                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
140                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
141                   (0 << 22) | /* surface object control state, FIXME??? */
142                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
143                   (0 << 2)  | /* must be 0 for interleave U/V */
144                   (1 << 1)  | /* must be tiled */
145                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
146     OUT_BCS_BATCH(batch,
147                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
148                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
149     OUT_BCS_BATCH(batch, 0);
150
151     ADVANCE_BCS_BATCH(batch);
152 }
153
154 static void
155 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
156                                 struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 26);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
165         /* the DW1-3 is for the MFX indirect bistream offset */
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169         /* the DW4-5 is the MFX upper bound */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW6-10 is for MFX Indirect MV Object Base Address */
174     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
178     OUT_BCS_BATCH(batch, 0);
179
180      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201         
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
206     OUT_BCS_BATCH(batch, 0);
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
213 {
214     struct intel_batchbuffer *batch = encoder_context->base.batch;
215     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
216     struct gen6_vme_context *vme_context = encoder_context->vme_context;
217     struct i965_driver_data *i965 = i965_driver_data(ctx);
218
219     if (IS_STEPPING_BPLUS(i965)) {
220         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221         return;
222     }
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262         /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268         /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
429     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
430     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
431
432     /*Encode common setup for MFC*/
433     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
434     mfc_context->post_deblocking_output.bo = NULL;
435
436     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
437     mfc_context->pre_deblocking_output.bo = NULL;
438
439     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
440     mfc_context->uncompressed_picture_source.bo = NULL;
441
442     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
443     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
444
445     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
446         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
447         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
448         mfc_context->direct_mv_buffers[i].bo = NULL;
449     }
450
451     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
452         if (mfc_context->reference_surfaces[i].bo != NULL)
453             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
454         mfc_context->reference_surfaces[i].bo = NULL;  
455     }
456
457     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
458     bo = dri_bo_alloc(i965->intel.bufmgr,
459                       "Buffer",
460                       width_in_mbs * 64,
461                       64);
462     assert(bo);
463     mfc_context->intra_row_store_scratch_buffer.bo = bo;
464
465     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
466     bo = dri_bo_alloc(i965->intel.bufmgr,
467                       "Buffer",
468                       width_in_mbs * height_in_mbs * 16,
469                       64);
470     assert(bo);
471     mfc_context->macroblock_status_buffer.bo = bo;
472
473     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
474     bo = dri_bo_alloc(i965->intel.bufmgr,
475                       "Buffer",
476                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
477                       64);
478     assert(bo);
479     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
480
481     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
482     bo = dri_bo_alloc(i965->intel.bufmgr,
483                       "Buffer",
484                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
485                       0x1000);
486     assert(bo);
487     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
488
489     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
490     mfc_context->mfc_batchbuffer_surface.bo = NULL;
491
492     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
493     mfc_context->aux_batchbuffer_surface.bo = NULL;
494
495     if (mfc_context->aux_batchbuffer)
496         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
497
498     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
499     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
500     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
501     mfc_context->aux_batchbuffer_surface.pitch = 16;
502     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
503     mfc_context->aux_batchbuffer_surface.size_block = 16;
504
505     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
506 }
507
508 static void
509 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
510                                 struct intel_encoder_context *encoder_context)
511 {
512     struct intel_batchbuffer *batch = encoder_context->base.batch;
513     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514     int i;
515
516     BEGIN_BCS_BATCH(batch, 61);
517
518     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
519
520     /* the DW1-3 is for pre_deblocking */
521     if (mfc_context->pre_deblocking_output.bo)
522         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
523                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
524                       0);
525     else
526         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
527
528         OUT_BCS_BATCH(batch, 0);
529         OUT_BCS_BATCH(batch, 0);
530      /* the DW4-6 is for the post_deblocking */
531
532     if (mfc_context->post_deblocking_output.bo)
533         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
534                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                       0);                                                                                       /* post output addr  */ 
536     else
537         OUT_BCS_BATCH(batch, 0);
538         OUT_BCS_BATCH(batch, 0);
539         OUT_BCS_BATCH(batch, 0);
540
541      /* the DW7-9 is for the uncompressed_picture */
542     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
543                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544                   0); /* uncompressed data */
545
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW10-12 is for the mb status */
550     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0); /* StreamOut data*/
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW13-15 is for the intra_row_store_scratch */
557     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);   
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563      /* the DW16-18 is for the deblocking filter */
564     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
565                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566                   0);
567         OUT_BCS_BATCH(batch, 0);
568         OUT_BCS_BATCH(batch, 0);
569
570     /* the DW 19-50 is for Reference pictures*/
571     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
572         if ( mfc_context->reference_surfaces[i].bo != NULL) {
573             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
574                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                           0);                   
576         } else {
577             OUT_BCS_BATCH(batch, 0);
578         }
579         OUT_BCS_BATCH(batch, 0);
580     }
581         OUT_BCS_BATCH(batch, 0);
582
583         /* The DW 52-54 is for the MB status buffer */
584     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
585                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
586                   0);                                                                                   /* Macroblock status buffer*/
587         
588         OUT_BCS_BATCH(batch, 0);
589         OUT_BCS_BATCH(batch, 0);
590
591         /* the DW 55-57 is the ILDB buffer */
592         OUT_BCS_BATCH(batch, 0);
593         OUT_BCS_BATCH(batch, 0);
594         OUT_BCS_BATCH(batch, 0);
595
596         /* the DW 58-60 is the second ILDB buffer */
597         OUT_BCS_BATCH(batch, 0);
598         OUT_BCS_BATCH(batch, 0);
599         OUT_BCS_BATCH(batch, 0);
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
605 {
606     struct intel_batchbuffer *batch = encoder_context->base.batch;
607     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
608     struct i965_driver_data *i965 = i965_driver_data(ctx);
609     int i;
610
611     if (IS_STEPPING_BPLUS(i965)) {
612         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
613         return;
614     }
615
616     BEGIN_BCS_BATCH(batch, 25);
617
618     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
619
620     if (mfc_context->pre_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);
624     else
625         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
626
627     if (mfc_context->post_deblocking_output.bo)
628         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
629                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630                       0);                                                                                       /* post output addr  */ 
631     else
632         OUT_BCS_BATCH(batch, 0);
633
634     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
635                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                   0);                                                                                   /* uncompressed data */
637     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
638                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
639                   0);                                                                                   /* StreamOut data*/
640     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
641                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                   0);   
643     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
644                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
645                   0);
646     /* 7..22 Reference pictures*/
647     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
648         if ( mfc_context->reference_surfaces[i].bo != NULL) {
649             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
650                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                           0);                   
652         } else {
653             OUT_BCS_BATCH(batch, 0);
654         }
655     }
656     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);                                                                                   /* Macroblock status buffer*/
659
660         OUT_BCS_BATCH(batch, 0);
661
662     ADVANCE_BCS_BATCH(batch);
663 }
664
665 static void
666 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
667                                 struct intel_encoder_context *encoder_context)
668 {
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
671
672     int i;
673
674     BEGIN_BCS_BATCH(batch, 71);
675
676     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
677
678     /* Reference frames and Current frames */
679     /* the DW1-32 is for the direct MV for reference */
680     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
681         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
682             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
683                           I915_GEM_DOMAIN_INSTRUCTION, 0,
684                           0);
685             OUT_BCS_BATCH(batch, 0);
686         } else {
687             OUT_BCS_BATCH(batch, 0);
688             OUT_BCS_BATCH(batch, 0);
689         }
690     }
691         OUT_BCS_BATCH(batch, 0);
692
693         /* the DW34-36 is the MV for the current reference */
694         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697
698         OUT_BCS_BATCH(batch, 0);
699         OUT_BCS_BATCH(batch, 0);
700
701     /* POL list */
702     for(i = 0; i < 32; i++) {
703         OUT_BCS_BATCH(batch, i/2);
704     }
705     OUT_BCS_BATCH(batch, 0);
706     OUT_BCS_BATCH(batch, 0);
707
708     ADVANCE_BCS_BATCH(batch);
709 }
710
711 static void
712 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
713 {
714     struct intel_batchbuffer *batch = encoder_context->base.batch;
715     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
716     struct i965_driver_data *i965 = i965_driver_data(ctx);
717     int i;
718
719     if (IS_STEPPING_BPLUS(i965)) {
720         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
721         return;
722     }
723
724     BEGIN_BCS_BATCH(batch, 69);
725
726     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
727
728     /* Reference frames and Current frames */
729     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
730         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
731             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
732                           I915_GEM_DOMAIN_INSTRUCTION, 0,
733                           0);
734         } else {
735             OUT_BCS_BATCH(batch, 0);
736         }
737     }
738
739     /* POL list */
740     for(i = 0; i < 32; i++) {
741         OUT_BCS_BATCH(batch, i/2);
742     }
743     OUT_BCS_BATCH(batch, 0);
744     OUT_BCS_BATCH(batch, 0);
745
746     ADVANCE_BCS_BATCH(batch);
747 }
748
749 static void
750 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
751 {
752     struct intel_batchbuffer *batch = encoder_context->base.batch;
753     int i;
754
755     BEGIN_BCS_BATCH(batch, 10);
756     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
757     OUT_BCS_BATCH(batch, 0);                  //Select L0
758     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
759     for(i = 0; i < 7; i++) {
760         OUT_BCS_BATCH(batch, 0x80808080);
761     }   
762     ADVANCE_BCS_BATCH(batch);
763
764     BEGIN_BCS_BATCH(batch, 10);
765     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
766     OUT_BCS_BATCH(batch, 1);                  //Select L1
767     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
768     for(i = 0; i < 7; i++) {
769         OUT_BCS_BATCH(batch, 0x80808080);
770     }   
771     ADVANCE_BCS_BATCH(batch);
772 }
773
774
775 static void
776 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
777                                 struct intel_encoder_context *encoder_context)
778 {
779     struct intel_batchbuffer *batch = encoder_context->base.batch;
780     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
781
782     BEGIN_BCS_BATCH(batch, 10);
783
784     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
785     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
786                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
787                   0);
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790         
791         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
792     OUT_BCS_BATCH(batch, 0);
793     OUT_BCS_BATCH(batch, 0);
794     OUT_BCS_BATCH(batch, 0);
795
796         /* the DW7-9 is for Bitplane Read Buffer Base Address */
797     OUT_BCS_BATCH(batch, 0);
798     OUT_BCS_BATCH(batch, 0);
799     OUT_BCS_BATCH(batch, 0);
800
801     ADVANCE_BCS_BATCH(batch);
802 }
803
804 static void
805 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
806 {
807     struct intel_batchbuffer *batch = encoder_context->base.batch;
808     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810
811     if (IS_STEPPING_BPLUS(i965)) {
812         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
813         return;
814     }
815
816     BEGIN_BCS_BATCH(batch, 4);
817
818     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
819     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
820                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
821                   0);
822     OUT_BCS_BATCH(batch, 0);
823     OUT_BCS_BATCH(batch, 0);
824
825     ADVANCE_BCS_BATCH(batch);
826 }
827
828
829 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
830                                       struct encode_state *encode_state,
831                                       struct intel_encoder_context *encoder_context)
832 {
833     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
834
835     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
836     mfc_context->set_surface_state(ctx, encoder_context);
837     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
838     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
839     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
840     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
841     mfc_context->avc_qm_state(ctx, encoder_context);
842     mfc_context->avc_fqm_state(ctx, encoder_context);
843     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
844     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
845 }
846
847
848 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
849                              struct encode_state *encode_state,
850                              struct intel_encoder_context *encoder_context)
851 {
852     struct intel_batchbuffer *batch = encoder_context->base.batch;
853
854     intel_batchbuffer_flush(batch);             //run the pipeline
855
856     return VA_STATUS_SUCCESS;
857 }
858
859
860 static VAStatus
861 gen75_mfc_stop(VADriverContextP ctx, 
862               struct encode_state *encode_state,
863               struct intel_encoder_context *encoder_context,
864               int *encoded_bits_size)
865 {
866     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
867     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
868     VACodedBufferSegment *coded_buffer_segment;
869     
870     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
871     assert(vaStatus == VA_STATUS_SUCCESS);
872     *encoded_bits_size = coded_buffer_segment->size * 8;
873     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
874
875     return VA_STATUS_SUCCESS;
876 }
877
878
879 static void
880 gen75_mfc_avc_slice_state(VADriverContextP ctx,
881                          VAEncPictureParameterBufferH264 *pic_param,
882                          VAEncSliceParameterBufferH264 *slice_param,
883                          struct encode_state *encode_state,
884                          struct intel_encoder_context *encoder_context,
885                          int rate_control_enable,
886                          int qp,
887                          struct intel_batchbuffer *batch)
888 {
889     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
890     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
891     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
892     int beginmb = slice_param->macroblock_address;
893     int endmb = beginmb + slice_param->num_macroblocks;
894     int beginx = beginmb % width_in_mbs;
895     int beginy = beginmb / width_in_mbs;
896     int nextx =  endmb % width_in_mbs;
897     int nexty = endmb / width_in_mbs;
898     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
899     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
900     int maxQpN, maxQpP;
901     unsigned char correct[6], grow, shrink;
902     int i;
903     int bslice = 0;
904     int weighted_pred_idc = 0;
905     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
906     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
907
908     if (batch == NULL)
909         batch = encoder_context->base.batch;
910
911     if (slice_type == SLICE_TYPE_P) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
913     } else if (slice_type == SLICE_TYPE_B) {
914         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
915         bslice = 1;
916
917         if (weighted_pred_idc == 2) {
918             /* 8.4.3 - Derivation process for prediction weights (8-279) */
919             luma_log2_weight_denom = 5;
920             chroma_log2_weight_denom = 5;
921         }
922     }
923
924     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
925     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
926
927     for (i = 0; i < 6; i++)
928         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
929
930     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
931         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
932     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
933         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
934
935     BEGIN_BCS_BATCH(batch, 11);;
936
937     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
938     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
939
940     if (slice_type == SLICE_TYPE_I) {
941         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
942     } else {
943         OUT_BCS_BATCH(batch,
944                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
945                       (chroma_log2_weight_denom << 8) |
946                       (luma_log2_weight_denom << 0));
947     }
948
949     OUT_BCS_BATCH(batch, 
950                   (weighted_pred_idc << 30) |
951                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
952                   (slice_param->disable_deblocking_filter_idc << 27) |
953                   (slice_param->cabac_init_idc << 24) |
954                   (qp<<16) |                    /*Slice Quantization Parameter*/
955                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
956                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
957     OUT_BCS_BATCH(batch,
958                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
959                   (beginx << 16) |
960                   slice_param->macroblock_address );
961     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
962     OUT_BCS_BATCH(batch, 
963                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
964                   (1 << 30) |           /*ResetRateControlCounter*/
965                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
966                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
967                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
968                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
969                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
970                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
971                   (last_slice << 19) |     /*IsLastSlice*/
972                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
973                   (1 << 17) |       /*HeaderPresentFlag*/       
974                   (1 << 16) |       /*SliceData PresentFlag*/
975                   (1 << 15) |       /*TailPresentFlag*/
976                   (1 << 13) |       /*RBSP NAL TYPE*/   
977                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
978     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
979     OUT_BCS_BATCH(batch,
980                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
981                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
982                   (shrink << 8)  |
983                   (grow << 0));   
984     OUT_BCS_BATCH(batch,
985                   (correct[5] << 20) |
986                   (correct[4] << 16) |
987                   (correct[3] << 12) |
988                   (correct[2] << 8) |
989                   (correct[1] << 4) |
990                   (correct[0] << 0));
991     OUT_BCS_BATCH(batch, 0);
992
993     ADVANCE_BCS_BATCH(batch);
994 }
995
996
997 #ifdef MFC_SOFTWARE_HASWELL
998
999 static int
1000 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1001                                 int qp,unsigned int *msg,
1002                               struct intel_encoder_context *encoder_context,
1003                               unsigned char target_mb_size, unsigned char max_mb_size,
1004                               struct intel_batchbuffer *batch)
1005 {
1006     int len_in_dwords = 12;
1007     unsigned int intra_msg;
1008 #define         INTRA_MSG_FLAG          (1 << 13)
1009 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1010     if (batch == NULL)
1011         batch = encoder_context->base.batch;
1012
1013     BEGIN_BCS_BATCH(batch, len_in_dwords);
1014
1015     intra_msg = msg[0] & 0xC0FF;
1016     intra_msg |= INTRA_MSG_FLAG;
1017     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1018     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1019     OUT_BCS_BATCH(batch, 0);
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 
1022                   (0 << 24) |           /* PackedMvNum, Debug*/
1023                   (0 << 20) |           /* No motion vector */
1024                   (1 << 19) |           /* CbpDcY */
1025                   (1 << 18) |           /* CbpDcU */
1026                   (1 << 17) |           /* CbpDcV */
1027                   intra_msg);
1028
1029     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1030     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1031     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1032
1033     /*Stuff for Intra MB*/
1034     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1035     OUT_BCS_BATCH(batch, msg[2]);       
1036     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1037     
1038     /*MaxSizeInWord and TargetSzieInWord*/
1039     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1040                   (target_mb_size << 16) );
1041
1042     OUT_BCS_BATCH(batch, 0);
1043
1044     ADVANCE_BCS_BATCH(batch);
1045
1046     return len_in_dwords;
1047 }
1048
1049 static int
1050 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1051                               unsigned int *msg, unsigned int offset,
1052                               struct intel_encoder_context *encoder_context,
1053                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1054                               struct intel_batchbuffer *batch)
1055 {
1056     int len_in_dwords = 12;
1057         unsigned int inter_msg = 0;
1058     if (batch == NULL)
1059         batch = encoder_context->base.batch;
1060     {
1061 #define MSG_MV_OFFSET   4
1062         unsigned int *mv_ptr;
1063         mv_ptr = msg + MSG_MV_OFFSET;
1064         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1065          * to convert them to be compatible with the format of AVC_PAK
1066          * command.
1067          */
1068         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1069                 /* MV[0] and MV[2] are replicated */
1070                 mv_ptr[4] = mv_ptr[0];
1071                 mv_ptr[5] = mv_ptr[1];
1072                 mv_ptr[2] = mv_ptr[8];
1073                 mv_ptr[3] = mv_ptr[9];
1074                 mv_ptr[6] = mv_ptr[8]; 
1075                 mv_ptr[7] = mv_ptr[9]; 
1076         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1077                 /* MV[0] and MV[1] are replicated */
1078                 mv_ptr[2] = mv_ptr[0];  
1079                 mv_ptr[3] = mv_ptr[1];
1080                 mv_ptr[4] = mv_ptr[16]; 
1081                 mv_ptr[5] = mv_ptr[17]; 
1082                 mv_ptr[6] = mv_ptr[24];
1083                 mv_ptr[7] = mv_ptr[25];
1084         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1085                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1086                 /* Don't touch MV[0] or MV[1] */
1087                 mv_ptr[2] = mv_ptr[8];
1088                 mv_ptr[3] = mv_ptr[9];
1089                 mv_ptr[4] = mv_ptr[16];
1090                 mv_ptr[5] = mv_ptr[17];
1091                 mv_ptr[6] = mv_ptr[24];
1092                 mv_ptr[7] = mv_ptr[25];
1093         }
1094     }
1095
1096     BEGIN_BCS_BATCH(batch, len_in_dwords);
1097
1098     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1099
1100         inter_msg = 32;
1101         /* MV quantity */
1102         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1103                 if (msg[1] & SUBMB_SHAPE_MASK)
1104                         inter_msg = 128;
1105         }
1106     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1107     OUT_BCS_BATCH(batch, offset);
1108         inter_msg = msg[0] & (0x1F00FFFF);
1109         inter_msg |= INTER_MV8;
1110         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1111         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1112                         (msg[1] & SUBMB_SHAPE_MASK)) {
1113                 inter_msg |= INTER_MV32;
1114         }
1115
1116     OUT_BCS_BATCH(batch, inter_msg);
1117
1118     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1119     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1120 #if 0 
1121     if ( slice_type == SLICE_TYPE_B) {
1122         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1123     } else {
1124         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1125     }
1126 #else
1127     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1128 #endif
1129
1130         inter_msg = msg[1] >> 8;
1131     /*Stuff for Inter MB*/
1132     OUT_BCS_BATCH(batch, inter_msg);        
1133     OUT_BCS_BATCH(batch, 0x0);    
1134     OUT_BCS_BATCH(batch, 0x0);        
1135
1136     /*MaxSizeInWord and TargetSzieInWord*/
1137     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1138                   (target_mb_size << 16) );
1139
1140     OUT_BCS_BATCH(batch, 0x0);    
1141
1142     ADVANCE_BCS_BATCH(batch);
1143
1144     return len_in_dwords;
1145 }
1146
1147 #define         AVC_INTRA_RDO_OFFSET    4
1148 #define         AVC_INTER_RDO_OFFSET    10
1149 #define         AVC_INTER_MSG_OFFSET    8       
1150 #define         AVC_INTER_MV_OFFSET             48
1151 #define         AVC_RDO_MASK            0xFFFF
1152
1153 static void 
1154 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1155                                        struct encode_state *encode_state,
1156                                        struct intel_encoder_context *encoder_context,
1157                                        int slice_index,
1158                                        struct intel_batchbuffer *slice_batch)
1159 {
1160     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1161     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1162     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1163     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1164     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1165     unsigned int *msg = NULL, offset = 0;
1166     unsigned char *msg_ptr = NULL;
1167     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1168     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1169     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1170     int i,x,y;
1171     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1172     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1173     unsigned char *slice_header = NULL;
1174     int slice_header_length_in_bits = 0;
1175     unsigned int tail_data[] = { 0x0, 0x0 };
1176     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1177     int is_intra = slice_type == SLICE_TYPE_I;
1178
1179     if (rate_control_mode == VA_RC_CBR) {
1180         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1181         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1182     }
1183
1184     /* only support for 8-bit pixel bit-depth */
1185     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1186     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1187     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1188     assert(qp >= 0 && qp < 52);
1189
1190     gen75_mfc_avc_slice_state(ctx, 
1191                              pPicParameter,
1192                              pSliceParameter,
1193                              encode_state, encoder_context,
1194                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1195
1196     if ( slice_index == 0) 
1197         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1198
1199     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1200
1201     // slice hander
1202     mfc_context->insert_object(ctx, encoder_context,
1203                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1204                                5,  /* first 5 bytes are start code + nal unit type */
1205                                1, 0, 1, slice_batch);
1206
1207     dri_bo_map(vme_context->vme_output.bo , 1);
1208     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1209
1210     if (is_intra) {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     } else {
1213         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1214     }
1215    
1216     for (i = pSliceParameter->macroblock_address; 
1217          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1218         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1219         x = i % width_in_mbs;
1220         y = i / width_in_mbs;
1221         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1222
1223         if (is_intra) {
1224             assert(msg);
1225             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1226         } else {
1227             int inter_rdo, intra_rdo;
1228             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1229             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1230             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1231             if (intra_rdo < inter_rdo) { 
1232                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1233             } else {
1234                 msg += AVC_INTER_MSG_OFFSET;
1235                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1236             }
1237         }
1238     }
1239    
1240     dri_bo_unmap(vme_context->vme_output.bo);
1241
1242     if ( last_slice ) {    
1243         mfc_context->insert_object(ctx, encoder_context,
1244                                    tail_data, 2, 8,
1245                                    2, 1, 1, 0, slice_batch);
1246     } else {
1247         mfc_context->insert_object(ctx, encoder_context,
1248                                    tail_data, 1, 8,
1249                                    1, 1, 1, 0, slice_batch);
1250     }
1251
1252     free(slice_header);
1253
1254 }
1255
1256 static dri_bo *
1257 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1258                                   struct encode_state *encode_state,
1259                                   struct intel_encoder_context *encoder_context)
1260 {
1261     struct i965_driver_data *i965 = i965_driver_data(ctx);
1262     struct intel_batchbuffer *batch;
1263     dri_bo *batch_bo;
1264     int i;
1265     int buffer_size;
1266     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1267     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1268     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1269
1270     buffer_size = width_in_mbs * height_in_mbs * 64;
1271     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1272     batch_bo = batch->buffer;
1273     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1274         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1275     }
1276
1277     intel_batchbuffer_align(batch, 8);
1278     
1279     BEGIN_BCS_BATCH(batch, 2);
1280     OUT_BCS_BATCH(batch, 0);
1281     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1282     ADVANCE_BCS_BATCH(batch);
1283
1284     dri_bo_reference(batch_bo);
1285     intel_batchbuffer_free(batch);
1286
1287     return batch_bo;
1288 }
1289
1290 #else
1291
1292 static void
1293 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1294                                     struct encode_state *encode_state,
1295                                     struct intel_encoder_context *encoder_context)
1296
1297 {
1298     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1299     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1300
1301     assert(vme_context->vme_output.bo);
1302     mfc_context->buffer_suface_setup(ctx,
1303                                      &mfc_context->gpe_context,
1304                                      &vme_context->vme_output,
1305                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1306                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1307     assert(mfc_context->aux_batchbuffer_surface.bo);
1308     mfc_context->buffer_suface_setup(ctx,
1309                                      &mfc_context->gpe_context,
1310                                      &mfc_context->aux_batchbuffer_surface,
1311                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1312                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1313 }
1314
1315 static void
1316 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1317                                      struct encode_state *encode_state,
1318                                      struct intel_encoder_context *encoder_context)
1319
1320 {
1321     struct i965_driver_data *i965 = i965_driver_data(ctx);
1322     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1323     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1324     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1325     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1326     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1327     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1328     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1329     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1330                                                            "MFC batchbuffer",
1331                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1332                                                            0x1000);
1333     mfc_context->buffer_suface_setup(ctx,
1334                                      &mfc_context->gpe_context,
1335                                      &mfc_context->mfc_batchbuffer_surface,
1336                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1337                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1338 }
1339
1340 static void
1341 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1342                                     struct encode_state *encode_state,
1343                                     struct intel_encoder_context *encoder_context)
1344 {
1345     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1346     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1347 }
1348
1349 static void
1350 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1351                                 struct encode_state *encode_state,
1352                                 struct intel_encoder_context *encoder_context)
1353 {
1354     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1355     struct gen6_interface_descriptor_data *desc;   
1356     int i;
1357     dri_bo *bo;
1358
1359     bo = mfc_context->gpe_context.idrt.bo;
1360     dri_bo_map(bo, 1);
1361     assert(bo->virtual);
1362     desc = bo->virtual;
1363
1364     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1365         struct i965_kernel *kernel;
1366
1367         kernel = &mfc_context->gpe_context.kernels[i];
1368         assert(sizeof(*desc) == 32);
1369
1370         /*Setup the descritor table*/
1371         memset(desc, 0, sizeof(*desc));
1372         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1373         desc->desc2.sampler_count = 0;
1374         desc->desc2.sampler_state_pointer = 0;
1375         desc->desc3.binding_table_entry_count = 2;
1376         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1377         desc->desc4.constant_urb_entry_read_offset = 0;
1378         desc->desc4.constant_urb_entry_read_length = 4;
1379                 
1380         /*kernel start*/
1381         dri_bo_emit_reloc(bo,   
1382                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1383                           0,
1384                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1385                           kernel->bo);
1386         desc++;
1387     }
1388
1389     dri_bo_unmap(bo);
1390 }
1391
1392 static void
1393 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1394                                     struct encode_state *encode_state,
1395                                     struct intel_encoder_context *encoder_context)
1396 {
1397     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1398     
1399     (void)mfc_context;
1400 }
1401
1402 static void
1403 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1404                                          int index,
1405                                          int head_offset,
1406                                          int batchbuffer_offset,
1407                                          int head_size,
1408                                          int tail_size,
1409                                          int number_mb_cmds,
1410                                          int first_object,
1411                                          int last_object,
1412                                          int last_slice,
1413                                          int mb_x,
1414                                          int mb_y,
1415                                          int width_in_mbs,
1416                                          int qp)
1417 {
1418     BEGIN_BATCH(batch, 12);
1419     
1420     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1421     OUT_BATCH(batch, index);
1422     OUT_BATCH(batch, 0);
1423     OUT_BATCH(batch, 0);
1424     OUT_BATCH(batch, 0);
1425     OUT_BATCH(batch, 0);
1426    
1427     /*inline data */
1428     OUT_BATCH(batch, head_offset);
1429     OUT_BATCH(batch, batchbuffer_offset);
1430     OUT_BATCH(batch, 
1431               head_size << 16 |
1432               tail_size);
1433     OUT_BATCH(batch,
1434               number_mb_cmds << 16 |
1435               first_object << 2 |
1436               last_object << 1 |
1437               last_slice);
1438     OUT_BATCH(batch,
1439               mb_y << 8 |
1440               mb_x);
1441     OUT_BATCH(batch,
1442               qp << 16 |
1443               width_in_mbs);
1444
1445     ADVANCE_BATCH(batch);
1446 }
1447
1448 static void
1449 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1450                                        struct intel_encoder_context *encoder_context,
1451                                        VAEncSliceParameterBufferH264 *slice_param,
1452                                        int head_offset,
1453                                        unsigned short head_size,
1454                                        unsigned short tail_size,
1455                                        int batchbuffer_offset,
1456                                        int qp,
1457                                        int last_slice)
1458 {
1459     struct intel_batchbuffer *batch = encoder_context->base.batch;
1460     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1461     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1462     int total_mbs = slice_param->num_macroblocks;
1463     int number_mb_cmds = 128;
1464     int starting_mb = 0;
1465     int last_object = 0;
1466     int first_object = 1;
1467     int i;
1468     int mb_x, mb_y;
1469     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1470
1471     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1472         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1473         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1474         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1475         assert(mb_x <= 255 && mb_y <= 255);
1476
1477         starting_mb += number_mb_cmds;
1478
1479         gen75_mfc_batchbuffer_emit_object_command(batch,
1480                                                  index,
1481                                                  head_offset,
1482                                                  batchbuffer_offset,
1483                                                  head_size,
1484                                                  tail_size,
1485                                                  number_mb_cmds,
1486                                                  first_object,
1487                                                  last_object,
1488                                                  last_slice,
1489                                                  mb_x,
1490                                                  mb_y,
1491                                                  width_in_mbs,
1492                                                  qp);
1493
1494         if (first_object) {
1495             head_offset += head_size;
1496             batchbuffer_offset += head_size;
1497         }
1498
1499         if (last_object) {
1500             head_offset += tail_size;
1501             batchbuffer_offset += tail_size;
1502         }
1503
1504         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1505
1506         first_object = 0;
1507     }
1508
1509     if (!last_object) {
1510         last_object = 1;
1511         number_mb_cmds = total_mbs % number_mb_cmds;
1512         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1513         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1514         assert(mb_x <= 255 && mb_y <= 255);
1515         starting_mb += number_mb_cmds;
1516
1517         gen75_mfc_batchbuffer_emit_object_command(batch,
1518                                                  index,
1519                                                  head_offset,
1520                                                  batchbuffer_offset,
1521                                                  head_size,
1522                                                  tail_size,
1523                                                  number_mb_cmds,
1524                                                  first_object,
1525                                                  last_object,
1526                                                  last_slice,
1527                                                  mb_x,
1528                                                  mb_y,
1529                                                  width_in_mbs,
1530                                                  qp);
1531     }
1532 }
1533                           
1534 /*
1535  * return size in Owords (16bytes)
1536  */         
1537 static int
1538 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1539                                struct encode_state *encode_state,
1540                                struct intel_encoder_context *encoder_context,
1541                                int slice_index,
1542                                int batchbuffer_offset)
1543 {
1544     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1545     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1546     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1547     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1548     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1549     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1550     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1551     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1552     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1553     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1554     unsigned char *slice_header = NULL;
1555     int slice_header_length_in_bits = 0;
1556     unsigned int tail_data[] = { 0x0, 0x0 };
1557     long head_offset;
1558     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1559     unsigned short head_size, tail_size;
1560     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1561
1562     if (rate_control_mode == VA_RC_CBR) {
1563         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1564         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1565     }
1566
1567     /* only support for 8-bit pixel bit-depth */
1568     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1569     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1570     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1571     assert(qp >= 0 && qp < 52);
1572
1573     head_offset = old_used / 16;
1574     gen75_mfc_avc_slice_state(ctx,
1575                              pPicParameter,
1576                              pSliceParameter,
1577                              encode_state,
1578                              encoder_context,
1579                              (rate_control_mode == VA_RC_CBR),
1580                              qp,
1581                              slice_batch);
1582
1583     if (slice_index == 0)
1584         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1585
1586     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1587
1588     // slice hander
1589     mfc_context->insert_object(ctx,
1590                                encoder_context,
1591                                (unsigned int *)slice_header,
1592                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1593                                slice_header_length_in_bits & 0x1f,
1594                                5,  /* first 5 bytes are start code + nal unit type */
1595                                1,
1596                                0,
1597                                1,
1598                                slice_batch);
1599     free(slice_header);
1600
1601     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1602     used = intel_batchbuffer_used_size(slice_batch);
1603     head_size = (used - old_used) / 16;
1604     old_used = used;
1605
1606     /* tail */
1607     if (last_slice) {    
1608         mfc_context->insert_object(ctx,
1609                                    encoder_context,
1610                                    tail_data,
1611                                    2,
1612                                    8,
1613                                    2,
1614                                    1,
1615                                    1,
1616                                    0,
1617                                    slice_batch);
1618     } else {
1619         mfc_context->insert_object(ctx,
1620                                    encoder_context,
1621                                    tail_data,
1622                                    1,
1623                                    8,
1624                                    1,
1625                                    1,
1626                                    1,
1627                                    0,
1628                                    slice_batch);
1629     }
1630
1631     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1632     used = intel_batchbuffer_used_size(slice_batch);
1633     tail_size = (used - old_used) / 16;
1634
1635    
1636     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1637                                            encoder_context,
1638                                            pSliceParameter,
1639                                            head_offset,
1640                                            head_size,
1641                                            tail_size,
1642                                            batchbuffer_offset,
1643                                            qp,
1644                                            last_slice);
1645
1646     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1647 }
1648
1649 static void
1650 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1651                                   struct encode_state *encode_state,
1652                                   struct intel_encoder_context *encoder_context)
1653 {
1654     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1655     struct intel_batchbuffer *batch = encoder_context->base.batch;
1656     int i, size, offset = 0;
1657     intel_batchbuffer_start_atomic(batch, 0x4000); 
1658     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1659
1660     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1661         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1662         offset += size;
1663     }
1664
1665     intel_batchbuffer_end_atomic(batch);
1666     intel_batchbuffer_flush(batch);
1667 }
1668
1669 static void
1670 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1671                                struct encode_state *encode_state,
1672                                struct intel_encoder_context *encoder_context)
1673 {
1674     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1675     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1676     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1677     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1678 }
1679
1680 static dri_bo *
1681 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1682                                   struct encode_state *encode_state,
1683                                   struct intel_encoder_context *encoder_context)
1684 {
1685     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1686
1687     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1688     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1689
1690     return mfc_context->mfc_batchbuffer_surface.bo;
1691 }
1692
1693 #endif
1694
1695 static void
1696 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1697                                  struct encode_state *encode_state,
1698                                  struct intel_encoder_context *encoder_context)
1699 {
1700     struct intel_batchbuffer *batch = encoder_context->base.batch;
1701     dri_bo *slice_batch_bo;
1702
1703     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1704         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1705         assert(0);
1706         return; 
1707     }
1708
1709 #ifdef MFC_SOFTWARE_HASWELL
1710     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1711 #else
1712     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1713 #endif
1714
1715     // begin programing
1716     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1717     intel_batchbuffer_emit_mi_flush(batch);
1718     
1719     // picture level programing
1720     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1721
1722     BEGIN_BCS_BATCH(batch, 2);
1723     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1724     OUT_BCS_RELOC(batch,
1725                   slice_batch_bo,
1726                   I915_GEM_DOMAIN_COMMAND, 0, 
1727                   0);
1728     ADVANCE_BCS_BATCH(batch);
1729
1730     // end programing
1731     intel_batchbuffer_end_atomic(batch);
1732
1733     dri_bo_unreference(slice_batch_bo);
1734 }
1735
1736
1737 static VAStatus
1738 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1739                             struct encode_state *encode_state,
1740                             struct intel_encoder_context *encoder_context)
1741 {
1742     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1743     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1744     int current_frame_bits_size;
1745     int sts;
1746  
1747     for (;;) {
1748         gen75_mfc_init(ctx, encode_state, encoder_context);
1749         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1750         /*Programing bcs pipeline*/
1751         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1752         gen75_mfc_run(ctx, encode_state, encoder_context);
1753         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1754             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1755             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1756             if (sts == BRC_NO_HRD_VIOLATION) {
1757                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1758                 break;
1759             }
1760             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1761                 if (!mfc_context->hrd.violation_noted) {
1762                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1763                     mfc_context->hrd.violation_noted = 1;
1764                 }
1765                 return VA_STATUS_SUCCESS;
1766             }
1767         } else {
1768             break;
1769         }
1770     }
1771
1772     return VA_STATUS_SUCCESS;
1773 }
1774
1775 /*
1776  * MPEG-2
1777  */
1778
1779 static const int
1780 va_to_gen75_mpeg2_picture_type[3] = {
1781     1,  /* I */
1782     2,  /* P */
1783     3   /* B */
1784 };
1785
1786 static void
1787 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1788                           struct intel_encoder_context *encoder_context,
1789                           struct encode_state *encode_state)
1790 {
1791     struct intel_batchbuffer *batch = encoder_context->base.batch;
1792     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1793     VAEncPictureParameterBufferMPEG2 *pic_param;
1794     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1795     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1796
1797     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1798     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1799
1800     BEGIN_BCS_BATCH(batch, 13);
1801     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1802     OUT_BCS_BATCH(batch,
1803                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1804                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1805                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1806                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1807                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1808                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1809                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1810                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1811                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1812                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1813                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1814                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1815     OUT_BCS_BATCH(batch,
1816                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1817                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1818                   0);
1819     OUT_BCS_BATCH(batch,
1820                   1 << 31 |     /* slice concealment */
1821                   (height_in_mbs - 1) << 16 |
1822                   (width_in_mbs - 1));
1823     OUT_BCS_BATCH(batch, 0);
1824     OUT_BCS_BATCH(batch, 0);
1825     OUT_BCS_BATCH(batch,
1826                   0xFFF << 16 | /* InterMBMaxSize */
1827                   0xFFF << 0 |  /* IntraMBMaxSize */
1828                   0);
1829     OUT_BCS_BATCH(batch, 0);
1830     OUT_BCS_BATCH(batch, 0);
1831     OUT_BCS_BATCH(batch, 0);
1832     OUT_BCS_BATCH(batch, 0);
1833     OUT_BCS_BATCH(batch, 0);
1834     OUT_BCS_BATCH(batch, 0);
1835     ADVANCE_BCS_BATCH(batch);
1836 }
1837
1838 static void
1839 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1840 {
1841     unsigned char intra_qm[64] = {
1842          8, 16, 19, 22, 26, 27, 29, 34,
1843         16, 16, 22, 24, 27, 29, 34, 37,
1844         19, 22, 26, 27, 29, 34, 34, 38,
1845         22, 22, 26, 27, 29, 34, 37, 40,
1846         22, 26, 27, 29, 32, 35, 40, 48,
1847         26, 27, 29, 32, 35, 40, 48, 58,
1848         26, 27, 29, 34, 38, 46, 56, 69,
1849         27, 29, 35, 38, 46, 56, 69, 83
1850     };
1851
1852     unsigned char non_intra_qm[64] = {
1853         16, 16, 16, 16, 16, 16, 16, 16,
1854         16, 16, 16, 16, 16, 16, 16, 16,
1855         16, 16, 16, 16, 16, 16, 16, 16,
1856         16, 16, 16, 16, 16, 16, 16, 16,
1857         16, 16, 16, 16, 16, 16, 16, 16,
1858         16, 16, 16, 16, 16, 16, 16, 16,
1859         16, 16, 16, 16, 16, 16, 16, 16,
1860         16, 16, 16, 16, 16, 16, 16, 16
1861     };
1862
1863     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1864     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1865 }
1866
1867 static void
1868 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1869 {
1870     unsigned short intra_fqm[64] = {
1871          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1872          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1873          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1874          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1875          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1876          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1877          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1878          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1879     };
1880
1881     unsigned short non_intra_fqm[64] = {
1882         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1883         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1884         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1885         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1886         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1887         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1888         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1889         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1890     };
1891
1892     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1893     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1894 }
1895
1896 static void
1897 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1898                                  struct intel_encoder_context *encoder_context,
1899                                  int x, int y,
1900                                  int next_x, int next_y,
1901                                  int is_fisrt_slice_group,
1902                                  int is_last_slice_group,
1903                                  int intra_slice,
1904                                  int qp,
1905                                  struct intel_batchbuffer *batch)
1906 {
1907     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1908
1909     if (batch == NULL)
1910         batch = encoder_context->base.batch;
1911
1912     BEGIN_BCS_BATCH(batch, 8);
1913
1914     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1915     OUT_BCS_BATCH(batch,
1916                   0 << 31 |                             /* MbRateCtrlFlag */
1917                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1918                   1 << 17 |                             /* Insert Header before the first slice group data */
1919                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1920                   1 << 15 |                             /* TailPresentFlag: always 1 */
1921                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1922                   !!intra_slice << 13 |                 /* IntraSlice */
1923                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1924                   0);
1925     OUT_BCS_BATCH(batch,
1926                   next_y << 24 |
1927                   next_x << 16 |
1928                   y << 8 |
1929                   x << 0 |
1930                   0);
1931     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1932     /* bitstream pointer is only loaded once for the first slice of a frame when 
1933      * LoadSlicePointerFlag is 0
1934      */
1935     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1936     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1937     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1938     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1939
1940     ADVANCE_BCS_BATCH(batch);
1941 }
1942
1943 static int
1944 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1945                                  struct intel_encoder_context *encoder_context,
1946                                  int x, int y,
1947                                  int first_mb_in_slice,
1948                                  int last_mb_in_slice,
1949                                  int first_mb_in_slice_group,
1950                                  int last_mb_in_slice_group,
1951                                  int mb_type,
1952                                  int qp_scale_code,
1953                                  int coded_block_pattern,
1954                                  unsigned char target_size_in_word,
1955                                  unsigned char max_size_in_word,
1956                                  struct intel_batchbuffer *batch)
1957 {
1958     int len_in_dwords = 9;
1959
1960     if (batch == NULL)
1961         batch = encoder_context->base.batch;
1962
1963     BEGIN_BCS_BATCH(batch, len_in_dwords);
1964
1965     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1966     OUT_BCS_BATCH(batch,
1967                   0 << 24 |     /* PackedMvNum */
1968                   0 << 20 |     /* MvFormat */
1969                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1970                   0 << 15 |     /* TransformFlag: frame DCT */
1971                   0 << 14 |     /* FieldMbFlag */
1972                   1 << 13 |     /* IntraMbFlag */
1973                   mb_type << 8 |   /* MbType: Intra */
1974                   0 << 2 |      /* SkipMbFlag */
1975                   0 << 0 |      /* InterMbMode */
1976                   0);
1977     OUT_BCS_BATCH(batch, y << 16 | x);
1978     OUT_BCS_BATCH(batch,
1979                   max_size_in_word << 24 |
1980                   target_size_in_word << 16 |
1981                   coded_block_pattern << 6 |      /* CBP */
1982                   0);
1983     OUT_BCS_BATCH(batch,
1984                   last_mb_in_slice << 31 |
1985                   first_mb_in_slice << 30 |
1986                   0 << 27 |     /* EnableCoeffClamp */
1987                   last_mb_in_slice_group << 26 |
1988                   0 << 25 |     /* MbSkipConvDisable */
1989                   first_mb_in_slice_group << 24 |
1990                   0 << 16 |     /* MvFieldSelect */
1991                   qp_scale_code << 0 |
1992                   0);
1993     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1994     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1995     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1996     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1997
1998     ADVANCE_BCS_BATCH(batch);
1999
2000     return len_in_dwords;
2001 }
2002
2003 #define MPEG2_INTER_MV_OFFSET   12 
2004
2005 static struct _mv_ranges
2006 {
2007     int low;    /* in the unit of 1/2 pixel */
2008     int high;   /* in the unit of 1/2 pixel */
2009 } mv_ranges[] = {
2010     {0, 0},
2011     {-16, 15},
2012     {-32, 31},
2013     {-64, 63},
2014     {-128, 127},
2015     {-256, 255},
2016     {-512, 511},
2017     {-1024, 1023},
2018     {-2048, 2047},
2019     {-4096, 4095}
2020 };
2021
2022 static int
2023 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2024 {
2025     if (mv + pos * 16 * 2 < 0 ||
2026         mv + (pos + 1) * 16 * 2 > display_max * 2)
2027         mv = 0;
2028
2029     if (f_code > 0 && f_code < 10) {
2030         if (mv < mv_ranges[f_code].low)
2031             mv = mv_ranges[f_code].low;
2032
2033         if (mv > mv_ranges[f_code].high)
2034             mv = mv_ranges[f_code].high;
2035     }
2036
2037     return mv;
2038 }
2039
2040 static int
2041 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2042                                  struct encode_state *encode_state,
2043                                  struct intel_encoder_context *encoder_context,
2044                                  unsigned int *msg,
2045                                  int width_in_mbs, int height_in_mbs,
2046                                  int x, int y,
2047                                  int first_mb_in_slice,
2048                                  int last_mb_in_slice,
2049                                  int first_mb_in_slice_group,
2050                                  int last_mb_in_slice_group,
2051                                  int qp_scale_code,
2052                                  unsigned char target_size_in_word,
2053                                  unsigned char max_size_in_word,
2054                                  struct intel_batchbuffer *batch)
2055 {
2056     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2057     int len_in_dwords = 9;
2058     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2059     
2060     if (batch == NULL)
2061         batch = encoder_context->base.batch;
2062
2063     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2064     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2065     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2066     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2067     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2068
2069     BEGIN_BCS_BATCH(batch, len_in_dwords);
2070
2071     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2072     OUT_BCS_BATCH(batch,
2073                   2 << 24 |     /* PackedMvNum */
2074                   7 << 20 |     /* MvFormat */
2075                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2076                   0 << 15 |     /* TransformFlag: frame DCT */
2077                   0 << 14 |     /* FieldMbFlag */
2078                   0 << 13 |     /* IntraMbFlag */
2079                   1 << 8 |      /* MbType: Frame-based */
2080                   0 << 2 |      /* SkipMbFlag */
2081                   0 << 0 |      /* InterMbMode */
2082                   0);
2083     OUT_BCS_BATCH(batch, y << 16 | x);
2084     OUT_BCS_BATCH(batch,
2085                   max_size_in_word << 24 |
2086                   target_size_in_word << 16 |
2087                   0x3f << 6 |   /* CBP */
2088                   0);
2089     OUT_BCS_BATCH(batch,
2090                   last_mb_in_slice << 31 |
2091                   first_mb_in_slice << 30 |
2092                   0 << 27 |     /* EnableCoeffClamp */
2093                   last_mb_in_slice_group << 26 |
2094                   0 << 25 |     /* MbSkipConvDisable */
2095                   first_mb_in_slice_group << 24 |
2096                   0 << 16 |     /* MvFieldSelect */
2097                   qp_scale_code << 0 |
2098                   0);
2099
2100     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2101     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2102     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2103     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2104
2105     ADVANCE_BCS_BATCH(batch);
2106
2107     return len_in_dwords;
2108 }
2109
2110 static void
2111 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2112                                            struct encode_state *encode_state,
2113                                            struct intel_encoder_context *encoder_context,
2114                                            struct intel_batchbuffer *slice_batch)
2115 {
2116     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2117     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2118
2119     if (encode_state->packed_header_data[idx]) {
2120         VAEncPackedHeaderParameterBuffer *param = NULL;
2121         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2122         unsigned int length_in_bits;
2123
2124         assert(encode_state->packed_header_param[idx]);
2125         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2126         length_in_bits = param->bit_length;
2127
2128         mfc_context->insert_object(ctx,
2129                                    encoder_context,
2130                                    header_data,
2131                                    ALIGN(length_in_bits, 32) >> 5,
2132                                    length_in_bits & 0x1f,
2133                                    5,   /* FIXME: check it */
2134                                    0,
2135                                    0,
2136                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2137                                    slice_batch);
2138     }
2139
2140     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2141
2142     if (encode_state->packed_header_data[idx]) {
2143         VAEncPackedHeaderParameterBuffer *param = NULL;
2144         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2145         unsigned int length_in_bits;
2146
2147         assert(encode_state->packed_header_param[idx]);
2148         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2149         length_in_bits = param->bit_length;
2150
2151         mfc_context->insert_object(ctx,
2152                                    encoder_context,
2153                                    header_data,
2154                                    ALIGN(length_in_bits, 32) >> 5,
2155                                    length_in_bits & 0x1f,
2156                                    5,   /* FIXME: check it */
2157                                    0,
2158                                    0,
2159                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2160                                    slice_batch);
2161     }
2162 }
2163
2164 static void 
2165 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2166                                      struct encode_state *encode_state,
2167                                      struct intel_encoder_context *encoder_context,
2168                                      int slice_index,
2169                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2170                                      struct intel_batchbuffer *slice_batch)
2171 {
2172     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2173     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2174     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2175     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2176     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2177     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2178     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2179     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2180     int i, j;
2181     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2182     unsigned int *msg = NULL;
2183     unsigned char *msg_ptr = NULL;
2184
2185     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2186     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2187     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2188     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2189
2190     dri_bo_map(vme_context->vme_output.bo , 0);
2191     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2192
2193     if (next_slice_group_param) {
2194         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2195         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2196     } else {
2197         h_next_start_pos = 0;
2198         v_next_start_pos = height_in_mbs;
2199     }
2200
2201     gen75_mfc_mpeg2_slicegroup_state(ctx,
2202                                      encoder_context,
2203                                      h_start_pos,
2204                                      v_start_pos,
2205                                      h_next_start_pos,
2206                                      v_next_start_pos,
2207                                      slice_index == 0,
2208                                      next_slice_group_param == NULL,
2209                                      slice_param->is_intra_slice,
2210                                      slice_param->quantiser_scale_code,
2211                                      slice_batch);
2212
2213     if (slice_index == 0) 
2214         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2215
2216     /* Insert '00' to make sure the header is valid */
2217     mfc_context->insert_object(ctx,
2218                                encoder_context,
2219                                (unsigned int*)section_delimiter,
2220                                1,
2221                                8,   /* 8bits in the last DWORD */
2222                                1,   /* 1 byte */
2223                                1,
2224                                0,
2225                                0,
2226                                slice_batch);
2227
2228     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2229         /* PAK for each macroblocks */
2230         for (j = 0; j < slice_param->num_macroblocks; j++) {
2231             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2232             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2233             int first_mb_in_slice = (j == 0);
2234             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2235             int first_mb_in_slice_group = (i == 0 && j == 0);
2236             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2237                                           j == slice_param->num_macroblocks - 1);
2238
2239             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2240
2241             if (slice_param->is_intra_slice) {
2242                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2243                                                  encoder_context,
2244                                                  h_pos, v_pos,
2245                                                  first_mb_in_slice,
2246                                                  last_mb_in_slice,
2247                                                  first_mb_in_slice_group,
2248                                                  last_mb_in_slice_group,
2249                                                  0x1a,
2250                                                  slice_param->quantiser_scale_code,
2251                                                  0x3f,
2252                                                  0,
2253                                                  0xff,
2254                                                  slice_batch);
2255             } else {
2256                 gen75_mfc_mpeg2_pak_object_inter(ctx,
2257                                                  encode_state,
2258                                                  encoder_context,
2259                                                  msg,
2260                                                  width_in_mbs, height_in_mbs,
2261                                                  h_pos, v_pos,
2262                                                  first_mb_in_slice,
2263                                                  last_mb_in_slice,
2264                                                  first_mb_in_slice_group,
2265                                                  last_mb_in_slice_group,
2266                                                  slice_param->quantiser_scale_code,
2267                                                  0,
2268                                                  0xff,
2269                                                  slice_batch);
2270             }
2271         }
2272
2273         slice_param++;
2274     }
2275
2276     dri_bo_unmap(vme_context->vme_output.bo);
2277
2278     /* tail data */
2279     if (next_slice_group_param == NULL) { /* end of a picture */
2280         mfc_context->insert_object(ctx,
2281                                    encoder_context,
2282                                    (unsigned int *)tail_delimiter,
2283                                    2,
2284                                    8,   /* 8bits in the last DWORD */
2285                                    5,   /* 5 bytes */
2286                                    1,
2287                                    1,
2288                                    0,
2289                                    slice_batch);
2290     } else {        /* end of a lsice group */
2291         mfc_context->insert_object(ctx,
2292                                    encoder_context,
2293                                    (unsigned int *)section_delimiter,
2294                                    1,
2295                                    8,   /* 8bits in the last DWORD */
2296                                    1,   /* 1 byte */
2297                                    1,
2298                                    1,
2299                                    0,
2300                                    slice_batch);
2301     }
2302 }
2303
2304 /* 
2305  * A batch buffer for all slices, including slice state, 
2306  * slice insert object and slice pak object commands
2307  *
2308  */
2309 static dri_bo *
2310 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2311                                            struct encode_state *encode_state,
2312                                            struct intel_encoder_context *encoder_context)
2313 {
2314     struct i965_driver_data *i965 = i965_driver_data(ctx);
2315     struct intel_batchbuffer *batch;
2316     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2317     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2318     dri_bo *batch_bo;
2319     int i;
2320     int buffer_size;
2321     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2322     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2323
2324     buffer_size = width_in_mbs * height_in_mbs * 64;
2325     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2326     batch_bo = batch->buffer;
2327
2328     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2329         if (i == encode_state->num_slice_params_ext - 1)
2330             next_slice_group_param = NULL;
2331         else
2332             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2333
2334         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2335     }
2336
2337     intel_batchbuffer_align(batch, 8);
2338     
2339     BEGIN_BCS_BATCH(batch, 2);
2340     OUT_BCS_BATCH(batch, 0);
2341     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2342     ADVANCE_BCS_BATCH(batch);
2343
2344     dri_bo_reference(batch_bo);
2345     intel_batchbuffer_free(batch);
2346
2347     return batch_bo;
2348 }
2349
2350 static void
2351 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2352                                             struct encode_state *encode_state,
2353                                             struct intel_encoder_context *encoder_context)
2354 {
2355     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2356
2357     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2358     mfc_context->set_surface_state(ctx, encoder_context);
2359     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2360     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2361     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2362     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2363     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2364     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2365 }
2366
2367 static void
2368 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2369                                     struct encode_state *encode_state,
2370                                     struct intel_encoder_context *encoder_context)
2371 {
2372     struct intel_batchbuffer *batch = encoder_context->base.batch;
2373     dri_bo *slice_batch_bo;
2374
2375     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2376
2377     // begin programing
2378     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2379     intel_batchbuffer_emit_mi_flush(batch);
2380     
2381     // picture level programing
2382     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2383
2384     BEGIN_BCS_BATCH(batch, 2);
2385     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2386     OUT_BCS_RELOC(batch,
2387                   slice_batch_bo,
2388                   I915_GEM_DOMAIN_COMMAND, 0, 
2389                   0);
2390     ADVANCE_BCS_BATCH(batch);
2391
2392     // end programing
2393     intel_batchbuffer_end_atomic(batch);
2394
2395     dri_bo_unreference(slice_batch_bo);
2396 }
2397
2398 static VAStatus
2399 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2400                         struct encode_state *encode_state,
2401                         struct intel_encoder_context *encoder_context)
2402 {
2403     struct i965_driver_data *i965 = i965_driver_data(ctx);
2404     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2405     struct object_surface *obj_surface; 
2406     struct object_buffer *obj_buffer;
2407     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2408     struct i965_coded_buffer_segment *coded_buffer_segment;
2409     VAStatus vaStatus = VA_STATUS_SUCCESS;
2410     dri_bo *bo;
2411     int i;
2412
2413     /* reconstructed surface */
2414     obj_surface = encode_state->reconstructed_object;
2415     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2416     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2417     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2418     mfc_context->surface_state.width = obj_surface->orig_width;
2419     mfc_context->surface_state.height = obj_surface->orig_height;
2420     mfc_context->surface_state.w_pitch = obj_surface->width;
2421     mfc_context->surface_state.h_pitch = obj_surface->height;
2422
2423     /* forward reference */
2424     obj_surface = SURFACE(pic_param->forward_reference_picture);
2425
2426     if (obj_surface && obj_surface->bo) {
2427         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2428         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2429     } else
2430         mfc_context->reference_surfaces[0].bo = NULL;
2431
2432     /* backward reference */
2433     obj_surface = SURFACE(pic_param->backward_reference_picture);
2434
2435     if (obj_surface && obj_surface->bo) {
2436         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2437         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2438     } else {
2439         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2440
2441         if (mfc_context->reference_surfaces[1].bo)
2442             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2443     }
2444
2445     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2446         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2447
2448         if (mfc_context->reference_surfaces[i].bo)
2449             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2450     }
2451     
2452     /* input YUV surface */
2453     obj_surface = encode_state->input_yuv_object;
2454     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2455     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2456
2457     /* coded buffer */
2458     obj_buffer = encode_state->coded_buf_object;
2459     bo = obj_buffer->buffer_store->bo;
2460     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2461     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2462     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2463     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2464
2465     /* set the internal flag to 0 to indicate the coded size is unknown */
2466     dri_bo_map(bo, 1);
2467     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2468     coded_buffer_segment->mapped = 0;
2469     coded_buffer_segment->codec = CODED_MPEG2;
2470     dri_bo_unmap(bo);
2471
2472     return vaStatus;
2473 }
2474
2475 static VAStatus
2476 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2477                                struct encode_state *encode_state,
2478                                struct intel_encoder_context *encoder_context)
2479 {
2480     gen75_mfc_init(ctx, encode_state, encoder_context);
2481     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2482     /*Programing bcs pipeline*/
2483     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2484     gen75_mfc_run(ctx, encode_state, encoder_context);
2485
2486     return VA_STATUS_SUCCESS;
2487 }
2488
2489 static void
2490 gen75_mfc_context_destroy(void *context)
2491 {
2492     struct gen6_mfc_context *mfc_context = context;
2493     int i;
2494
2495     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2496     mfc_context->post_deblocking_output.bo = NULL;
2497
2498     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2499     mfc_context->pre_deblocking_output.bo = NULL;
2500
2501     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2502     mfc_context->uncompressed_picture_source.bo = NULL;
2503
2504     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2505     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2506
2507     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2508         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2509         mfc_context->direct_mv_buffers[i].bo = NULL;
2510     }
2511
2512     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2513     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2514
2515     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2516     mfc_context->macroblock_status_buffer.bo = NULL;
2517
2518     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2519     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2520
2521     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2522     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2523
2524
2525     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2526         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2527         mfc_context->reference_surfaces[i].bo = NULL;  
2528     }
2529
2530     i965_gpe_context_destroy(&mfc_context->gpe_context);
2531
2532     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2533     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2534
2535     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2536     mfc_context->aux_batchbuffer_surface.bo = NULL;
2537
2538     if (mfc_context->aux_batchbuffer)
2539         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2540
2541     mfc_context->aux_batchbuffer = NULL;
2542
2543     free(mfc_context);
2544 }
2545
2546 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2547                   VAProfile profile,
2548                   struct encode_state *encode_state,
2549                   struct intel_encoder_context *encoder_context)
2550 {
2551     VAStatus vaStatus;
2552
2553     switch (profile) {
2554     case VAProfileH264Baseline:
2555     case VAProfileH264Main:
2556     case VAProfileH264High:
2557         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2558         break;
2559
2560         /* FIXME: add for other profile */
2561     case VAProfileMPEG2Simple:
2562     case VAProfileMPEG2Main:
2563         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2564         break;
2565
2566     default:
2567         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2568         break;
2569     }
2570
2571     return vaStatus;
2572 }
2573
2574 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2575 {
2576     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2577
2578     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2579
2580     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2581     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2582
2583     mfc_context->gpe_context.curbe.length = 32 * 4;
2584
2585     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2586     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2587     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2588     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2589     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2590
2591     i965_gpe_load_kernels(ctx,
2592                           &mfc_context->gpe_context,
2593                           gen75_mfc_kernels,
2594                           NUM_MFC_KERNEL);
2595
2596     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2597     mfc_context->set_surface_state = gen75_mfc_surface_state;
2598     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2599     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2600     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2601     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2602     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2603     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2604
2605     encoder_context->mfc_context = mfc_context;
2606     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2607     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2608     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2609
2610     return True;
2611 }