apply new mv offset for mpeg2 encoding on HSW
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define MFC_SOFTWARE_HASWELL    1
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
53 };
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
57 };
58
59 static struct i965_kernel gen75_mfc_kernels[] = {
60     {
61         "MFC AVC INTRA BATCHBUFFER ",
62         MFC_BATCHBUFFER_AVC_INTRA,
63         gen75_mfc_batchbuffer_avc_intra,
64         sizeof(gen75_mfc_batchbuffer_avc_intra),
65         NULL
66     },
67
68     {
69         "MFC AVC INTER BATCHBUFFER ",
70         MFC_BATCHBUFFER_AVC_INTER,
71         gen75_mfc_batchbuffer_avc_inter,
72         sizeof(gen75_mfc_batchbuffer_avc_inter),
73         NULL
74     },
75 };
76
77 #define         INTER_MODE_MASK         0x03
78 #define         INTER_8X8               0x03
79 #define         INTER_16X8              0x01
80 #define         INTER_8X16              0x02
81 #define         SUBMB_SHAPE_MASK        0x00FF00
82
83 #define         INTER_MV8               (4 << 20)
84 #define         INTER_MV32              (6 << 20)
85
86
87 static void
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
89                           int standard_select,
90                           struct intel_encoder_context *encoder_context)
91 {
92     struct intel_batchbuffer *batch = encoder_context->base.batch;
93     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
94
95     assert(standard_select == MFX_FORMAT_MPEG2 ||
96            standard_select == MFX_FORMAT_AVC);
97
98     BEGIN_BCS_BATCH(batch, 5);
99
100     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
101     OUT_BCS_BATCH(batch,
102                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103                   (MFD_MODE_VLD << 15) | /* VLD mode */
104                   (0 << 10) | /* Stream-Out Enable */
105                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
106                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
107                   (0 << 5)  | /* not in stitch mode */
108                   (1 << 4)  | /* encoding mode */
109                   (standard_select << 0));  /* standard select: avc or mpeg2 */
110     OUT_BCS_BATCH(batch,
111                   (0 << 7)  | /* expand NOA bus flag */
112                   (0 << 6)  | /* disable slice-level clock gating */
113                   (0 << 5)  | /* disable clock gating for NOA */
114                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
115                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
116                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
117                   (0 << 1)  |
118                   (0 << 0));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch, 0);
121
122     ADVANCE_BCS_BATCH(batch);
123 }
124
125 static void
126 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
127 {
128     struct intel_batchbuffer *batch = encoder_context->base.batch;
129     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
130
131     BEGIN_BCS_BATCH(batch, 6);
132
133     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch,
136                   ((mfc_context->surface_state.height - 1) << 18) |
137                   ((mfc_context->surface_state.width - 1) << 4));
138     OUT_BCS_BATCH(batch,
139                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
140                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
141                   (0 << 22) | /* surface object control state, FIXME??? */
142                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
143                   (0 << 2)  | /* must be 0 for interleave U/V */
144                   (1 << 1)  | /* must be tiled */
145                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
146     OUT_BCS_BATCH(batch,
147                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
148                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
149     OUT_BCS_BATCH(batch, 0);
150
151     ADVANCE_BCS_BATCH(batch);
152 }
153
154 static void
155 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
156                                 struct intel_encoder_context *encoder_context)
157 {
158     struct intel_batchbuffer *batch = encoder_context->base.batch;
159     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
160     struct gen6_vme_context *vme_context = encoder_context->vme_context;
161
162     BEGIN_BCS_BATCH(batch, 26);
163
164     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
165         /* the DW1-3 is for the MFX indirect bistream offset */
166     OUT_BCS_BATCH(batch, 0);
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169         /* the DW4-5 is the MFX upper bound */
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172
173     /* the DW6-10 is for MFX Indirect MV Object Base Address */
174     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
175     OUT_BCS_BATCH(batch, 0);
176     OUT_BCS_BATCH(batch, 0);
177     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
178     OUT_BCS_BATCH(batch, 0);
179
180      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183     OUT_BCS_BATCH(batch, 0);
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186
187      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190     OUT_BCS_BATCH(batch, 0);
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193
194     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
195     OUT_BCS_RELOC(batch,
196                   mfc_context->mfc_indirect_pak_bse_object.bo,
197                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                   0);
199     OUT_BCS_BATCH(batch, 0);
200     OUT_BCS_BATCH(batch, 0);
201         
202     OUT_BCS_RELOC(batch,
203                   mfc_context->mfc_indirect_pak_bse_object.bo,
204                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
206     OUT_BCS_BATCH(batch, 0);
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
213 {
214     struct intel_batchbuffer *batch = encoder_context->base.batch;
215     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
216     struct gen6_vme_context *vme_context = encoder_context->vme_context;
217     struct i965_driver_data *i965 = i965_driver_data(ctx);
218
219     if (IS_STEPPING_BPLUS(i965)) {
220         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221         return;
222     }
223     BEGIN_BCS_BATCH(batch, 11);
224
225     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     /* MFX Indirect MV Object Base Address */
229     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
230     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     OUT_BCS_BATCH(batch, 0);
234     OUT_BCS_BATCH(batch, 0);
235     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
236     OUT_BCS_RELOC(batch,
237                   mfc_context->mfc_indirect_pak_bse_object.bo,
238                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239                   0);
240     OUT_BCS_RELOC(batch,
241                   mfc_context->mfc_indirect_pak_bse_object.bo,
242                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
243                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
244
245     ADVANCE_BCS_BATCH(batch);
246 }
247
248 static void
249 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
250                        struct intel_encoder_context *encoder_context)
251 {
252     struct intel_batchbuffer *batch = encoder_context->base.batch;
253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
254     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
255
256     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
257     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
258
259     BEGIN_BCS_BATCH(batch, 16);
260
261     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
262         /*DW1. MB setting of frame */
263     OUT_BCS_BATCH(batch,
264                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
265     OUT_BCS_BATCH(batch, 
266                   ((height_in_mbs - 1) << 16) | 
267                   ((width_in_mbs - 1) << 0));
268         /* DW3 QP setting */
269     OUT_BCS_BATCH(batch, 
270                   (0 << 24) |   /* Second Chroma QP Offset */
271                   (0 << 16) |   /* Chroma QP Offset */
272                   (0 << 14) |   /* Max-bit conformance Intra flag */
273                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
274                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
275                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
276                   (0 << 8)  |   /* FIXME: Image Structure */
277                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) |   /* Mininum Frame size */
280                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
281                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
282                   (0 << 13) |   /* CABAC 0 word insertion test enable */
283                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
284                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
285                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
286                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
287                   (0 << 6)  |   /* Only valid for VLD decoding mode */
288                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
289                   (0 << 4)  |   /* Direct 8x8 inference flag */
290                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
291                   (1 << 2)  |   /* Frame MB only flag */
292                   (0 << 1)  |   /* MBAFF mode is in active */
293                   (0 << 0));    /* Field picture flag */
294         /* DW5 Trellis quantization */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300         /* DW8. QP delta */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
302     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
303         /* DW10. Bit setting for MB */  
304     OUT_BCS_BATCH(batch, 0x8C000000);
305     OUT_BCS_BATCH(batch, 0x00010000);
306         /* DW12. */
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0x02010100);
309         /* DW14. For short format */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312
313     ADVANCE_BCS_BATCH(batch);
314 }
315
316 static void
317 gen75_mfc_qm_state(VADriverContextP ctx,
318                   int qm_type,
319                   unsigned int *qm,
320                   int qm_length,
321                   struct intel_encoder_context *encoder_context)
322 {
323     struct intel_batchbuffer *batch = encoder_context->base.batch;
324     unsigned int qm_buffer[16];
325
326     assert(qm_length <= 16);
327     assert(sizeof(*qm) == 4);
328     memcpy(qm_buffer, qm, qm_length * 4);
329
330     BEGIN_BCS_BATCH(batch, 18);
331     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
332     OUT_BCS_BATCH(batch, qm_type << 0);
333     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337 static void
338 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
339 {
340     unsigned int qm[16] = {
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010,
343         0x10101010, 0x10101010, 0x10101010, 0x10101010,
344         0x10101010, 0x10101010, 0x10101010, 0x10101010
345     };
346
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
349     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
350     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
351 }
352
353 static void
354 gen75_mfc_fqm_state(VADriverContextP ctx,
355                    int fqm_type,
356                    unsigned int *fqm,
357                    int fqm_length,
358                    struct intel_encoder_context *encoder_context)
359 {
360     struct intel_batchbuffer *batch = encoder_context->base.batch;
361     unsigned int fqm_buffer[32];
362
363     assert(fqm_length <= 32);
364     assert(sizeof(*fqm) == 4);
365     memcpy(fqm_buffer, fqm, fqm_length * 4);
366
367     BEGIN_BCS_BATCH(batch, 34);
368     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
369     OUT_BCS_BATCH(batch, fqm_type << 0);
370     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
371     ADVANCE_BCS_BATCH(batch);
372 }
373
374 static void
375 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
376 {
377     unsigned int qm[32] = {
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000,
384         0x10001000, 0x10001000, 0x10001000, 0x10001000,
385         0x10001000, 0x10001000, 0x10001000, 0x10001000
386     };
387
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
390     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
391     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
392 }
393
394 static void
395 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
396                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
397                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
398                            struct intel_batchbuffer *batch)
399 {
400     if (batch == NULL)
401         batch = encoder_context->base.batch;
402
403     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
404
405     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
406     OUT_BCS_BATCH(batch,
407                   (0 << 16) |   /* always start at offset 0 */
408                   (data_bits_in_last_dw << 8) |
409                   (skip_emul_byte_count << 4) |
410                   (!!emulation_flag << 3) |
411                   ((!!is_last_header) << 2) |
412                   ((!!is_end_of_slice) << 1) |
413                   (0 << 0));    /* FIXME: ??? */
414     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
415
416     ADVANCE_BCS_BATCH(batch);
417 }
418
419
420 static void gen75_mfc_init(VADriverContextP ctx,
421                         struct encode_state *encode_state,
422                         struct intel_encoder_context *encoder_context)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
426     dri_bo *bo;
427     int i;
428     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
429     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
430     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
431
432     /*Encode common setup for MFC*/
433     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
434     mfc_context->post_deblocking_output.bo = NULL;
435
436     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
437     mfc_context->pre_deblocking_output.bo = NULL;
438
439     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
440     mfc_context->uncompressed_picture_source.bo = NULL;
441
442     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
443     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
444
445     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
446         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
447         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
448         mfc_context->direct_mv_buffers[i].bo = NULL;
449     }
450
451     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
452         if (mfc_context->reference_surfaces[i].bo != NULL)
453             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
454         mfc_context->reference_surfaces[i].bo = NULL;  
455     }
456
457     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
458     bo = dri_bo_alloc(i965->intel.bufmgr,
459                       "Buffer",
460                       width_in_mbs * 64,
461                       64);
462     assert(bo);
463     mfc_context->intra_row_store_scratch_buffer.bo = bo;
464
465     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
466     bo = dri_bo_alloc(i965->intel.bufmgr,
467                       "Buffer",
468                       width_in_mbs * height_in_mbs * 16,
469                       64);
470     assert(bo);
471     mfc_context->macroblock_status_buffer.bo = bo;
472
473     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
474     bo = dri_bo_alloc(i965->intel.bufmgr,
475                       "Buffer",
476                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
477                       64);
478     assert(bo);
479     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
480
481     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
482     bo = dri_bo_alloc(i965->intel.bufmgr,
483                       "Buffer",
484                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
485                       0x1000);
486     assert(bo);
487     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
488
489     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
490     mfc_context->mfc_batchbuffer_surface.bo = NULL;
491
492     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
493     mfc_context->aux_batchbuffer_surface.bo = NULL;
494
495     if (mfc_context->aux_batchbuffer)
496         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
497
498     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
499     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
500     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
501     mfc_context->aux_batchbuffer_surface.pitch = 16;
502     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
503     mfc_context->aux_batchbuffer_surface.size_block = 16;
504
505     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
506 }
507
508 static void
509 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
510                                 struct intel_encoder_context *encoder_context)
511 {
512     struct intel_batchbuffer *batch = encoder_context->base.batch;
513     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514     int i;
515
516     BEGIN_BCS_BATCH(batch, 61);
517
518     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
519
520     /* the DW1-3 is for pre_deblocking */
521     if (mfc_context->pre_deblocking_output.bo)
522         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
523                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
524                       0);
525     else
526         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
527
528         OUT_BCS_BATCH(batch, 0);
529         OUT_BCS_BATCH(batch, 0);
530      /* the DW4-6 is for the post_deblocking */
531
532     if (mfc_context->post_deblocking_output.bo)
533         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
534                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                       0);                                                                                       /* post output addr  */ 
536     else
537         OUT_BCS_BATCH(batch, 0);
538         OUT_BCS_BATCH(batch, 0);
539         OUT_BCS_BATCH(batch, 0);
540
541      /* the DW7-9 is for the uncompressed_picture */
542     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
543                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
544                   0); /* uncompressed data */
545
546         OUT_BCS_BATCH(batch, 0);
547         OUT_BCS_BATCH(batch, 0);
548
549      /* the DW10-12 is for the mb status */
550     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
551                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                   0); /* StreamOut data*/
553         OUT_BCS_BATCH(batch, 0);
554         OUT_BCS_BATCH(batch, 0);
555
556      /* the DW13-15 is for the intra_row_store_scratch */
557     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0);   
560         OUT_BCS_BATCH(batch, 0);
561         OUT_BCS_BATCH(batch, 0);
562
563      /* the DW16-18 is for the deblocking filter */
564     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
565                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566                   0);
567         OUT_BCS_BATCH(batch, 0);
568         OUT_BCS_BATCH(batch, 0);
569
570     /* the DW 19-50 is for Reference pictures*/
571     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
572         if ( mfc_context->reference_surfaces[i].bo != NULL) {
573             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
574                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                           0);                   
576         } else {
577             OUT_BCS_BATCH(batch, 0);
578         }
579         OUT_BCS_BATCH(batch, 0);
580     }
581         OUT_BCS_BATCH(batch, 0);
582
583         /* The DW 52-54 is for the MB status buffer */
584     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
585                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
586                   0);                                                                                   /* Macroblock status buffer*/
587         
588         OUT_BCS_BATCH(batch, 0);
589         OUT_BCS_BATCH(batch, 0);
590
591         /* the DW 55-57 is the ILDB buffer */
592         OUT_BCS_BATCH(batch, 0);
593         OUT_BCS_BATCH(batch, 0);
594         OUT_BCS_BATCH(batch, 0);
595
596         /* the DW 58-60 is the second ILDB buffer */
597         OUT_BCS_BATCH(batch, 0);
598         OUT_BCS_BATCH(batch, 0);
599         OUT_BCS_BATCH(batch, 0);
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
605 {
606     struct intel_batchbuffer *batch = encoder_context->base.batch;
607     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
608     struct i965_driver_data *i965 = i965_driver_data(ctx);
609     int i;
610
611     if (IS_STEPPING_BPLUS(i965)) {
612         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
613         return;
614     }
615
616     BEGIN_BCS_BATCH(batch, 25);
617
618     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
619
620     if (mfc_context->pre_deblocking_output.bo)
621         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
622                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                       0);
624     else
625         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
626
627     if (mfc_context->post_deblocking_output.bo)
628         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
629                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630                       0);                                                                                       /* post output addr  */ 
631     else
632         OUT_BCS_BATCH(batch, 0);
633
634     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
635                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                   0);                                                                                   /* uncompressed data */
637     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
638                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
639                   0);                                                                                   /* StreamOut data*/
640     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
641                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                   0);   
643     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
644                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
645                   0);
646     /* 7..22 Reference pictures*/
647     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
648         if ( mfc_context->reference_surfaces[i].bo != NULL) {
649             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
650                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                           0);                   
652         } else {
653             OUT_BCS_BATCH(batch, 0);
654         }
655     }
656     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);                                                                                   /* Macroblock status buffer*/
659
660         OUT_BCS_BATCH(batch, 0);
661
662     ADVANCE_BCS_BATCH(batch);
663 }
664
665 static void
666 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
667                                 struct intel_encoder_context *encoder_context)
668 {
669     struct intel_batchbuffer *batch = encoder_context->base.batch;
670     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
671
672     int i;
673
674     BEGIN_BCS_BATCH(batch, 71);
675
676     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
677
678     /* Reference frames and Current frames */
679     /* the DW1-32 is for the direct MV for reference */
680     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
681         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
682             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
683                           I915_GEM_DOMAIN_INSTRUCTION, 0,
684                           0);
685             OUT_BCS_BATCH(batch, 0);
686         } else {
687             OUT_BCS_BATCH(batch, 0);
688             OUT_BCS_BATCH(batch, 0);
689         }
690     }
691         OUT_BCS_BATCH(batch, 0);
692
693         /* the DW34-36 is the MV for the current reference */
694         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
695                           I915_GEM_DOMAIN_INSTRUCTION, 0,
696                           0);
697
698         OUT_BCS_BATCH(batch, 0);
699         OUT_BCS_BATCH(batch, 0);
700
701     /* POL list */
702     for(i = 0; i < 32; i++) {
703         OUT_BCS_BATCH(batch, i/2);
704     }
705     OUT_BCS_BATCH(batch, 0);
706     OUT_BCS_BATCH(batch, 0);
707
708     ADVANCE_BCS_BATCH(batch);
709 }
710
711 static void
712 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
713 {
714     struct intel_batchbuffer *batch = encoder_context->base.batch;
715     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
716     struct i965_driver_data *i965 = i965_driver_data(ctx);
717     int i;
718
719     if (IS_STEPPING_BPLUS(i965)) {
720         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
721         return;
722     }
723
724     BEGIN_BCS_BATCH(batch, 69);
725
726     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
727
728     /* Reference frames and Current frames */
729     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
730         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
731             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
732                           I915_GEM_DOMAIN_INSTRUCTION, 0,
733                           0);
734         } else {
735             OUT_BCS_BATCH(batch, 0);
736         }
737     }
738
739     /* POL list */
740     for(i = 0; i < 32; i++) {
741         OUT_BCS_BATCH(batch, i/2);
742     }
743     OUT_BCS_BATCH(batch, 0);
744     OUT_BCS_BATCH(batch, 0);
745
746     ADVANCE_BCS_BATCH(batch);
747 }
748
749 static void
750 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
751 {
752     struct intel_batchbuffer *batch = encoder_context->base.batch;
753     int i;
754
755     BEGIN_BCS_BATCH(batch, 10);
756     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
757     OUT_BCS_BATCH(batch, 0);                  //Select L0
758     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
759     for(i = 0; i < 7; i++) {
760         OUT_BCS_BATCH(batch, 0x80808080);
761     }   
762     ADVANCE_BCS_BATCH(batch);
763
764     BEGIN_BCS_BATCH(batch, 10);
765     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
766     OUT_BCS_BATCH(batch, 1);                  //Select L1
767     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
768     for(i = 0; i < 7; i++) {
769         OUT_BCS_BATCH(batch, 0x80808080);
770     }   
771     ADVANCE_BCS_BATCH(batch);
772 }
773
774
775 static void
776 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
777                                 struct intel_encoder_context *encoder_context)
778 {
779     struct intel_batchbuffer *batch = encoder_context->base.batch;
780     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
781
782     BEGIN_BCS_BATCH(batch, 10);
783
784     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
785     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
786                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
787                   0);
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790         
791         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
792     OUT_BCS_BATCH(batch, 0);
793     OUT_BCS_BATCH(batch, 0);
794     OUT_BCS_BATCH(batch, 0);
795
796         /* the DW7-9 is for Bitplane Read Buffer Base Address */
797     OUT_BCS_BATCH(batch, 0);
798     OUT_BCS_BATCH(batch, 0);
799     OUT_BCS_BATCH(batch, 0);
800
801     ADVANCE_BCS_BATCH(batch);
802 }
803
804 static void
805 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
806 {
807     struct intel_batchbuffer *batch = encoder_context->base.batch;
808     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810
811     if (IS_STEPPING_BPLUS(i965)) {
812         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
813         return;
814     }
815
816     BEGIN_BCS_BATCH(batch, 4);
817
818     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
819     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
820                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
821                   0);
822     OUT_BCS_BATCH(batch, 0);
823     OUT_BCS_BATCH(batch, 0);
824
825     ADVANCE_BCS_BATCH(batch);
826 }
827
828
829 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
830                                       struct encode_state *encode_state,
831                                       struct intel_encoder_context *encoder_context)
832 {
833     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
834
835     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
836     mfc_context->set_surface_state(ctx, encoder_context);
837     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
838     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
839     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
840     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
841     mfc_context->avc_qm_state(ctx, encoder_context);
842     mfc_context->avc_fqm_state(ctx, encoder_context);
843     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
844     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
845 }
846
847
848 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
849                              struct encode_state *encode_state,
850                              struct intel_encoder_context *encoder_context)
851 {
852     struct intel_batchbuffer *batch = encoder_context->base.batch;
853
854     intel_batchbuffer_flush(batch);             //run the pipeline
855
856     return VA_STATUS_SUCCESS;
857 }
858
859
860 static VAStatus
861 gen75_mfc_stop(VADriverContextP ctx, 
862               struct encode_state *encode_state,
863               struct intel_encoder_context *encoder_context,
864               int *encoded_bits_size)
865 {
866     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
867     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
868     VACodedBufferSegment *coded_buffer_segment;
869     
870     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
871     assert(vaStatus == VA_STATUS_SUCCESS);
872     *encoded_bits_size = coded_buffer_segment->size * 8;
873     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
874
875     return VA_STATUS_SUCCESS;
876 }
877
878
879 static void
880 gen75_mfc_avc_slice_state(VADriverContextP ctx,
881                          VAEncPictureParameterBufferH264 *pic_param,
882                          VAEncSliceParameterBufferH264 *slice_param,
883                          struct encode_state *encode_state,
884                          struct intel_encoder_context *encoder_context,
885                          int rate_control_enable,
886                          int qp,
887                          struct intel_batchbuffer *batch)
888 {
889     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
890     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
891     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
892     int beginmb = slice_param->macroblock_address;
893     int endmb = beginmb + slice_param->num_macroblocks;
894     int beginx = beginmb % width_in_mbs;
895     int beginy = beginmb / width_in_mbs;
896     int nextx =  endmb % width_in_mbs;
897     int nexty = endmb / width_in_mbs;
898     int slice_type = slice_param->slice_type;
899     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
900     int bit_rate_control_target, maxQpN, maxQpP;
901     unsigned char correct[6], grow, shrink;
902     int i;
903     int bslice = 0;
904     int weighted_pred_idc = 0;
905     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
906     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
907
908     if (batch == NULL)
909         batch = encoder_context->base.batch;
910
911     bit_rate_control_target = slice_type;
912     if (slice_type == SLICE_TYPE_SP)
913         bit_rate_control_target = SLICE_TYPE_P;
914     else if (slice_type == SLICE_TYPE_SI)
915         bit_rate_control_target = SLICE_TYPE_I;
916
917     if (slice_type == SLICE_TYPE_P) {
918         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
919     } else if (slice_type == SLICE_TYPE_B) {
920         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
921         bslice = 1;
922
923         if (weighted_pred_idc == 2) {
924             /* 8.4.3 - Derivation process for prediction weights (8-279) */
925             luma_log2_weight_denom = 5;
926             chroma_log2_weight_denom = 5;
927         }
928     }
929
930     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
931     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
932
933     for (i = 0; i < 6; i++)
934         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
935
936     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
937         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
938     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
939         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
940
941     BEGIN_BCS_BATCH(batch, 11);;
942
943     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
944     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
945
946     if (slice_type == SLICE_TYPE_I) {
947         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
948     } else {
949         OUT_BCS_BATCH(batch,
950                       (1 << 16) | (bslice << 24) |                      /*1 reference frame*/
951                       (chroma_log2_weight_denom << 8) |
952                       (luma_log2_weight_denom << 0));
953     }
954
955     OUT_BCS_BATCH(batch, 
956                   (weighted_pred_idc << 30) |
957                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
958                   (slice_param->disable_deblocking_filter_idc << 27) |
959                   (slice_param->cabac_init_idc << 24) |
960                   (qp<<16) |                    /*Slice Quantization Parameter*/
961                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
962                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
963     OUT_BCS_BATCH(batch,
964                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
965                   (beginx << 16) |
966                   slice_param->macroblock_address );
967     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
968     OUT_BCS_BATCH(batch, 
969                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
970                   (1 << 30) |           /*ResetRateControlCounter*/
971                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
972                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
973                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
974                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
975                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
976                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
977                   (last_slice << 19) |     /*IsLastSlice*/
978                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
979                   (1 << 17) |       /*HeaderPresentFlag*/       
980                   (1 << 16) |       /*SliceData PresentFlag*/
981                   (1 << 15) |       /*TailPresentFlag*/
982                   (1 << 13) |       /*RBSP NAL TYPE*/   
983                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
984     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
985     OUT_BCS_BATCH(batch,
986                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
987                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
988                   (shrink << 8)  |
989                   (grow << 0));   
990     OUT_BCS_BATCH(batch,
991                   (correct[5] << 20) |
992                   (correct[4] << 16) |
993                   (correct[3] << 12) |
994                   (correct[2] << 8) |
995                   (correct[1] << 4) |
996                   (correct[0] << 0));
997     OUT_BCS_BATCH(batch, 0);
998
999     ADVANCE_BCS_BATCH(batch);
1000 }
1001
1002
1003 #ifdef MFC_SOFTWARE_HASWELL
1004
1005 static int
1006 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1007                                 int qp,unsigned int *msg,
1008                               struct intel_encoder_context *encoder_context,
1009                               unsigned char target_mb_size, unsigned char max_mb_size,
1010                               struct intel_batchbuffer *batch)
1011 {
1012     int len_in_dwords = 12;
1013     unsigned int intra_msg;
1014 #define         INTRA_MSG_FLAG          (1 << 13)
1015 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1016     if (batch == NULL)
1017         batch = encoder_context->base.batch;
1018
1019     BEGIN_BCS_BATCH(batch, len_in_dwords);
1020
1021     intra_msg = msg[0] & 0xC0FF;
1022     intra_msg |= INTRA_MSG_FLAG;
1023     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1024     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1025     OUT_BCS_BATCH(batch, 0);
1026     OUT_BCS_BATCH(batch, 0);
1027     OUT_BCS_BATCH(batch, 
1028                   (0 << 24) |           /* PackedMvNum, Debug*/
1029                   (0 << 20) |           /* No motion vector */
1030                   (1 << 19) |           /* CbpDcY */
1031                   (1 << 18) |           /* CbpDcU */
1032                   (1 << 17) |           /* CbpDcV */
1033                   intra_msg);
1034
1035     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1036     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1037     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1038
1039     /*Stuff for Intra MB*/
1040     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1041     OUT_BCS_BATCH(batch, msg[2]);       
1042     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1043     
1044     /*MaxSizeInWord and TargetSzieInWord*/
1045     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1046                   (target_mb_size << 16) );
1047
1048     OUT_BCS_BATCH(batch, 0);
1049
1050     ADVANCE_BCS_BATCH(batch);
1051
1052     return len_in_dwords;
1053 }
1054
1055 static int
1056 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1057                               unsigned int *msg, unsigned int offset,
1058                               struct intel_encoder_context *encoder_context,
1059                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1060                               struct intel_batchbuffer *batch)
1061 {
1062     int len_in_dwords = 12;
1063         unsigned int inter_msg = 0;
1064     if (batch == NULL)
1065         batch = encoder_context->base.batch;
1066     {
1067 #define MSG_MV_OFFSET   4
1068         unsigned int *mv_ptr;
1069         mv_ptr = msg + MSG_MV_OFFSET;
1070         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1071          * to convert them to be compatible with the format of AVC_PAK
1072          * command.
1073          */
1074         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1075                 /* MV[0] and MV[2] are replicated */
1076                 mv_ptr[4] = mv_ptr[0];
1077                 mv_ptr[5] = mv_ptr[1];
1078                 mv_ptr[2] = mv_ptr[8];
1079                 mv_ptr[3] = mv_ptr[9];
1080                 mv_ptr[6] = mv_ptr[8]; 
1081                 mv_ptr[7] = mv_ptr[9]; 
1082         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1083                 /* MV[0] and MV[1] are replicated */
1084                 mv_ptr[2] = mv_ptr[0];  
1085                 mv_ptr[3] = mv_ptr[1];
1086                 mv_ptr[4] = mv_ptr[16]; 
1087                 mv_ptr[5] = mv_ptr[17]; 
1088                 mv_ptr[6] = mv_ptr[24];
1089                 mv_ptr[7] = mv_ptr[25];
1090         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1091                         !(msg[1] & SUBMB_SHAPE_MASK)) {
1092                 /* Don't touch MV[0] or MV[1] */
1093                 mv_ptr[2] = mv_ptr[8];
1094                 mv_ptr[3] = mv_ptr[9];
1095                 mv_ptr[4] = mv_ptr[16];
1096                 mv_ptr[5] = mv_ptr[17];
1097                 mv_ptr[6] = mv_ptr[24];
1098                 mv_ptr[7] = mv_ptr[25];
1099         }
1100     }
1101
1102     BEGIN_BCS_BATCH(batch, len_in_dwords);
1103
1104     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1105
1106         inter_msg = 32;
1107         /* MV quantity */
1108         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1109                 if (msg[1] & SUBMB_SHAPE_MASK)
1110                         inter_msg = 128;
1111         }
1112     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1113     OUT_BCS_BATCH(batch, offset);
1114         inter_msg = msg[0] & (0x1F00FFFF);
1115         inter_msg |= INTER_MV8;
1116         inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1117         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1118                         (msg[1] & SUBMB_SHAPE_MASK)) {
1119                 inter_msg |= INTER_MV32;
1120         }
1121
1122     OUT_BCS_BATCH(batch, inter_msg);
1123
1124     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1125     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1126 #if 0 
1127     if ( slice_type == SLICE_TYPE_B) {
1128         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1129     } else {
1130         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1131     }
1132 #else
1133     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1134 #endif
1135
1136         inter_msg = msg[1] >> 8;
1137     /*Stuff for Inter MB*/
1138     OUT_BCS_BATCH(batch, inter_msg);        
1139     OUT_BCS_BATCH(batch, 0x0);    
1140     OUT_BCS_BATCH(batch, 0x0);        
1141
1142     /*MaxSizeInWord and TargetSzieInWord*/
1143     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1144                   (target_mb_size << 16) );
1145
1146     OUT_BCS_BATCH(batch, 0x0);    
1147
1148     ADVANCE_BCS_BATCH(batch);
1149
1150     return len_in_dwords;
1151 }
1152
1153 #define         AVC_INTRA_RDO_OFFSET    4
1154 #define         AVC_INTER_RDO_OFFSET    10
1155 #define         AVC_INTER_MSG_OFFSET    8       
1156 #define         AVC_INTER_MV_OFFSET             48
1157 #define         AVC_RDO_MASK            0xFFFF
1158
1159 static void 
1160 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1161                                        struct encode_state *encode_state,
1162                                        struct intel_encoder_context *encoder_context,
1163                                        int slice_index,
1164                                        struct intel_batchbuffer *slice_batch)
1165 {
1166     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1167     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1168     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1169     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1170     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1171     unsigned int *msg = NULL, offset = 0;
1172     unsigned char *msg_ptr = NULL;
1173     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1174     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1175     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1176     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1177     int i,x,y;
1178     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1179     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1180     unsigned char *slice_header = NULL;
1181     int slice_header_length_in_bits = 0;
1182     unsigned int tail_data[] = { 0x0, 0x0 };
1183     int slice_type = pSliceParameter->slice_type;
1184
1185
1186     if (rate_control_mode == VA_RC_CBR) {
1187         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1188         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1189     }
1190
1191     /* only support for 8-bit pixel bit-depth */
1192     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1193     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1194     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1195     assert(qp >= 0 && qp < 52);
1196
1197     gen75_mfc_avc_slice_state(ctx, 
1198                              pPicParameter,
1199                              pSliceParameter,
1200                              encode_state, encoder_context,
1201                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1202
1203     if ( slice_index == 0) 
1204         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1205
1206     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1207
1208     // slice hander
1209     mfc_context->insert_object(ctx, encoder_context,
1210                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1211                                5,  /* first 5 bytes are start code + nal unit type */
1212                                1, 0, 1, slice_batch);
1213
1214     dri_bo_map(vme_context->vme_output.bo , 1);
1215     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1216
1217     if (is_intra) {
1218         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1219     } else {
1220         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1221     }
1222    
1223     for (i = pSliceParameter->macroblock_address; 
1224          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1225         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1226         x = i % width_in_mbs;
1227         y = i / width_in_mbs;
1228         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1229
1230         if (is_intra) {
1231             assert(msg);
1232             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1233         } else {
1234             int inter_rdo, intra_rdo;
1235             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1236             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1237             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1238             if (intra_rdo < inter_rdo) { 
1239                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1240             } else {
1241                 msg += AVC_INTER_MSG_OFFSET;
1242                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1243             }
1244         }
1245     }
1246    
1247     dri_bo_unmap(vme_context->vme_output.bo);
1248
1249     if ( last_slice ) {    
1250         mfc_context->insert_object(ctx, encoder_context,
1251                                    tail_data, 2, 8,
1252                                    2, 1, 1, 0, slice_batch);
1253     } else {
1254         mfc_context->insert_object(ctx, encoder_context,
1255                                    tail_data, 1, 8,
1256                                    1, 1, 1, 0, slice_batch);
1257     }
1258
1259     free(slice_header);
1260
1261 }
1262
1263 static dri_bo *
1264 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1265                                   struct encode_state *encode_state,
1266                                   struct intel_encoder_context *encoder_context)
1267 {
1268     struct i965_driver_data *i965 = i965_driver_data(ctx);
1269     struct intel_batchbuffer *batch;
1270     dri_bo *batch_bo;
1271     int i;
1272     int buffer_size;
1273     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1274     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1275     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1276
1277     buffer_size = width_in_mbs * height_in_mbs * 64;
1278     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1279     batch_bo = batch->buffer;
1280     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1281         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1282     }
1283
1284     intel_batchbuffer_align(batch, 8);
1285     
1286     BEGIN_BCS_BATCH(batch, 2);
1287     OUT_BCS_BATCH(batch, 0);
1288     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1289     ADVANCE_BCS_BATCH(batch);
1290
1291     dri_bo_reference(batch_bo);
1292     intel_batchbuffer_free(batch);
1293
1294     return batch_bo;
1295 }
1296
1297 #else
1298
1299 static void
1300 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1301                                     struct encode_state *encode_state,
1302                                     struct intel_encoder_context *encoder_context)
1303
1304 {
1305     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1306     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1307
1308     assert(vme_context->vme_output.bo);
1309     mfc_context->buffer_suface_setup(ctx,
1310                                      &mfc_context->gpe_context,
1311                                      &vme_context->vme_output,
1312                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1313                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1314     assert(mfc_context->aux_batchbuffer_surface.bo);
1315     mfc_context->buffer_suface_setup(ctx,
1316                                      &mfc_context->gpe_context,
1317                                      &mfc_context->aux_batchbuffer_surface,
1318                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1319                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1320 }
1321
1322 static void
1323 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1324                                      struct encode_state *encode_state,
1325                                      struct intel_encoder_context *encoder_context)
1326
1327 {
1328     struct i965_driver_data *i965 = i965_driver_data(ctx);
1329     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1330     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1331     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1332     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1333     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1334     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1335     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1336     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1337                                                            "MFC batchbuffer",
1338                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1339                                                            0x1000);
1340     mfc_context->buffer_suface_setup(ctx,
1341                                      &mfc_context->gpe_context,
1342                                      &mfc_context->mfc_batchbuffer_surface,
1343                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1344                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1345 }
1346
1347 static void
1348 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1349                                     struct encode_state *encode_state,
1350                                     struct intel_encoder_context *encoder_context)
1351 {
1352     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1353     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1354 }
1355
1356 static void
1357 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1358                                 struct encode_state *encode_state,
1359                                 struct intel_encoder_context *encoder_context)
1360 {
1361     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1362     struct gen6_interface_descriptor_data *desc;   
1363     int i;
1364     dri_bo *bo;
1365
1366     bo = mfc_context->gpe_context.idrt.bo;
1367     dri_bo_map(bo, 1);
1368     assert(bo->virtual);
1369     desc = bo->virtual;
1370
1371     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1372         struct i965_kernel *kernel;
1373
1374         kernel = &mfc_context->gpe_context.kernels[i];
1375         assert(sizeof(*desc) == 32);
1376
1377         /*Setup the descritor table*/
1378         memset(desc, 0, sizeof(*desc));
1379         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1380         desc->desc2.sampler_count = 0;
1381         desc->desc2.sampler_state_pointer = 0;
1382         desc->desc3.binding_table_entry_count = 2;
1383         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1384         desc->desc4.constant_urb_entry_read_offset = 0;
1385         desc->desc4.constant_urb_entry_read_length = 4;
1386                 
1387         /*kernel start*/
1388         dri_bo_emit_reloc(bo,   
1389                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1390                           0,
1391                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1392                           kernel->bo);
1393         desc++;
1394     }
1395
1396     dri_bo_unmap(bo);
1397 }
1398
1399 static void
1400 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1401                                     struct encode_state *encode_state,
1402                                     struct intel_encoder_context *encoder_context)
1403 {
1404     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1405     
1406     (void)mfc_context;
1407 }
1408
1409 static void
1410 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1411                                          int index,
1412                                          int head_offset,
1413                                          int batchbuffer_offset,
1414                                          int head_size,
1415                                          int tail_size,
1416                                          int number_mb_cmds,
1417                                          int first_object,
1418                                          int last_object,
1419                                          int last_slice,
1420                                          int mb_x,
1421                                          int mb_y,
1422                                          int width_in_mbs,
1423                                          int qp)
1424 {
1425     BEGIN_BATCH(batch, 12);
1426     
1427     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1428     OUT_BATCH(batch, index);
1429     OUT_BATCH(batch, 0);
1430     OUT_BATCH(batch, 0);
1431     OUT_BATCH(batch, 0);
1432     OUT_BATCH(batch, 0);
1433    
1434     /*inline data */
1435     OUT_BATCH(batch, head_offset);
1436     OUT_BATCH(batch, batchbuffer_offset);
1437     OUT_BATCH(batch, 
1438               head_size << 16 |
1439               tail_size);
1440     OUT_BATCH(batch,
1441               number_mb_cmds << 16 |
1442               first_object << 2 |
1443               last_object << 1 |
1444               last_slice);
1445     OUT_BATCH(batch,
1446               mb_y << 8 |
1447               mb_x);
1448     OUT_BATCH(batch,
1449               qp << 16 |
1450               width_in_mbs);
1451
1452     ADVANCE_BATCH(batch);
1453 }
1454
1455 static void
1456 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1457                                        struct intel_encoder_context *encoder_context,
1458                                        VAEncSliceParameterBufferH264 *slice_param,
1459                                        int head_offset,
1460                                        unsigned short head_size,
1461                                        unsigned short tail_size,
1462                                        int batchbuffer_offset,
1463                                        int qp,
1464                                        int last_slice)
1465 {
1466     struct intel_batchbuffer *batch = encoder_context->base.batch;
1467     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1468     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1469     int total_mbs = slice_param->num_macroblocks;
1470     int number_mb_cmds = 128;
1471     int starting_mb = 0;
1472     int last_object = 0;
1473     int first_object = 1;
1474     int i;
1475     int mb_x, mb_y;
1476     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1477
1478     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1479         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1480         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1481         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1482         assert(mb_x <= 255 && mb_y <= 255);
1483
1484         starting_mb += number_mb_cmds;
1485
1486         gen75_mfc_batchbuffer_emit_object_command(batch,
1487                                                  index,
1488                                                  head_offset,
1489                                                  batchbuffer_offset,
1490                                                  head_size,
1491                                                  tail_size,
1492                                                  number_mb_cmds,
1493                                                  first_object,
1494                                                  last_object,
1495                                                  last_slice,
1496                                                  mb_x,
1497                                                  mb_y,
1498                                                  width_in_mbs,
1499                                                  qp);
1500
1501         if (first_object) {
1502             head_offset += head_size;
1503             batchbuffer_offset += head_size;
1504         }
1505
1506         if (last_object) {
1507             head_offset += tail_size;
1508             batchbuffer_offset += tail_size;
1509         }
1510
1511         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1512
1513         first_object = 0;
1514     }
1515
1516     if (!last_object) {
1517         last_object = 1;
1518         number_mb_cmds = total_mbs % number_mb_cmds;
1519         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1520         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1521         assert(mb_x <= 255 && mb_y <= 255);
1522         starting_mb += number_mb_cmds;
1523
1524         gen75_mfc_batchbuffer_emit_object_command(batch,
1525                                                  index,
1526                                                  head_offset,
1527                                                  batchbuffer_offset,
1528                                                  head_size,
1529                                                  tail_size,
1530                                                  number_mb_cmds,
1531                                                  first_object,
1532                                                  last_object,
1533                                                  last_slice,
1534                                                  mb_x,
1535                                                  mb_y,
1536                                                  width_in_mbs,
1537                                                  qp);
1538     }
1539 }
1540                           
1541 /*
1542  * return size in Owords (16bytes)
1543  */         
1544 static int
1545 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1546                                struct encode_state *encode_state,
1547                                struct intel_encoder_context *encoder_context,
1548                                int slice_index,
1549                                int batchbuffer_offset)
1550 {
1551     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1552     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1553     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1554     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1555     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1556     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1557     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1558     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1559     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1560     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1561     unsigned char *slice_header = NULL;
1562     int slice_header_length_in_bits = 0;
1563     unsigned int tail_data[] = { 0x0, 0x0 };
1564     long head_offset;
1565     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1566     unsigned short head_size, tail_size;
1567     int slice_type = pSliceParameter->slice_type;
1568
1569     if (rate_control_mode == VA_RC_CBR) {
1570         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1571         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1572     }
1573
1574     /* only support for 8-bit pixel bit-depth */
1575     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1576     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1577     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1578     assert(qp >= 0 && qp < 52);
1579
1580     head_offset = old_used / 16;
1581     gen75_mfc_avc_slice_state(ctx,
1582                              pPicParameter,
1583                              pSliceParameter,
1584                              encode_state,
1585                              encoder_context,
1586                              (rate_control_mode == VA_RC_CBR),
1587                              qp,
1588                              slice_batch);
1589
1590     if (slice_index == 0)
1591         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1592
1593     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1594
1595     // slice hander
1596     mfc_context->insert_object(ctx,
1597                                encoder_context,
1598                                (unsigned int *)slice_header,
1599                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1600                                slice_header_length_in_bits & 0x1f,
1601                                5,  /* first 5 bytes are start code + nal unit type */
1602                                1,
1603                                0,
1604                                1,
1605                                slice_batch);
1606     free(slice_header);
1607
1608     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1609     used = intel_batchbuffer_used_size(slice_batch);
1610     head_size = (used - old_used) / 16;
1611     old_used = used;
1612
1613     /* tail */
1614     if (last_slice) {    
1615         mfc_context->insert_object(ctx,
1616                                    encoder_context,
1617                                    tail_data,
1618                                    2,
1619                                    8,
1620                                    2,
1621                                    1,
1622                                    1,
1623                                    0,
1624                                    slice_batch);
1625     } else {
1626         mfc_context->insert_object(ctx,
1627                                    encoder_context,
1628                                    tail_data,
1629                                    1,
1630                                    8,
1631                                    1,
1632                                    1,
1633                                    1,
1634                                    0,
1635                                    slice_batch);
1636     }
1637
1638     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1639     used = intel_batchbuffer_used_size(slice_batch);
1640     tail_size = (used - old_used) / 16;
1641
1642    
1643     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1644                                            encoder_context,
1645                                            pSliceParameter,
1646                                            head_offset,
1647                                            head_size,
1648                                            tail_size,
1649                                            batchbuffer_offset,
1650                                            qp,
1651                                            last_slice);
1652
1653     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1654 }
1655
1656 static void
1657 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1658                                   struct encode_state *encode_state,
1659                                   struct intel_encoder_context *encoder_context)
1660 {
1661     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1662     struct intel_batchbuffer *batch = encoder_context->base.batch;
1663     int i, size, offset = 0;
1664     intel_batchbuffer_start_atomic(batch, 0x4000); 
1665     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1666
1667     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1668         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1669         offset += size;
1670     }
1671
1672     intel_batchbuffer_end_atomic(batch);
1673     intel_batchbuffer_flush(batch);
1674 }
1675
1676 static void
1677 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1678                                struct encode_state *encode_state,
1679                                struct intel_encoder_context *encoder_context)
1680 {
1681     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1682     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1683     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1684     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1685 }
1686
1687 static dri_bo *
1688 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1689                                   struct encode_state *encode_state,
1690                                   struct intel_encoder_context *encoder_context)
1691 {
1692     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1693
1694     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1695     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1696
1697     return mfc_context->mfc_batchbuffer_surface.bo;
1698 }
1699
1700 #endif
1701
1702 static void
1703 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1704                                  struct encode_state *encode_state,
1705                                  struct intel_encoder_context *encoder_context)
1706 {
1707     struct intel_batchbuffer *batch = encoder_context->base.batch;
1708     dri_bo *slice_batch_bo;
1709
1710     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1711         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1712         assert(0);
1713         return; 
1714     }
1715
1716 #ifdef MFC_SOFTWARE_HASWELL
1717     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1718 #else
1719     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1720 #endif
1721
1722     // begin programing
1723     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1724     intel_batchbuffer_emit_mi_flush(batch);
1725     
1726     // picture level programing
1727     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1728
1729     BEGIN_BCS_BATCH(batch, 2);
1730     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1731     OUT_BCS_RELOC(batch,
1732                   slice_batch_bo,
1733                   I915_GEM_DOMAIN_COMMAND, 0, 
1734                   0);
1735     ADVANCE_BCS_BATCH(batch);
1736
1737     // end programing
1738     intel_batchbuffer_end_atomic(batch);
1739
1740     dri_bo_unreference(slice_batch_bo);
1741 }
1742
1743
1744 static VAStatus
1745 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1746                             struct encode_state *encode_state,
1747                             struct intel_encoder_context *encoder_context)
1748 {
1749     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1750     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1751     int current_frame_bits_size;
1752     int sts;
1753  
1754     for (;;) {
1755         gen75_mfc_init(ctx, encode_state, encoder_context);
1756         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1757         /*Programing bcs pipeline*/
1758         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1759         gen75_mfc_run(ctx, encode_state, encoder_context);
1760         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1761             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1762             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1763             if (sts == BRC_NO_HRD_VIOLATION) {
1764                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1765                 break;
1766             }
1767             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1768                 if (!mfc_context->hrd.violation_noted) {
1769                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1770                     mfc_context->hrd.violation_noted = 1;
1771                 }
1772                 return VA_STATUS_SUCCESS;
1773             }
1774         } else {
1775             break;
1776         }
1777     }
1778
1779     return VA_STATUS_SUCCESS;
1780 }
1781
1782 /*
1783  * MPEG-2
1784  */
1785
1786 static const int
1787 va_to_gen75_mpeg2_picture_type[3] = {
1788     1,  /* I */
1789     2,  /* P */
1790     3   /* B */
1791 };
1792
1793 static void
1794 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1795                           struct intel_encoder_context *encoder_context,
1796                           struct encode_state *encode_state)
1797 {
1798     struct intel_batchbuffer *batch = encoder_context->base.batch;
1799     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1800     VAEncPictureParameterBufferMPEG2 *pic_param;
1801     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1802     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1803
1804     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1805     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1806
1807     BEGIN_BCS_BATCH(batch, 13);
1808     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1809     OUT_BCS_BATCH(batch,
1810                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1811                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1812                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1813                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1814                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1815                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1816                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1817                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1818                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1819                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1820                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1821                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1822     OUT_BCS_BATCH(batch,
1823                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1824                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1825                   0);
1826     OUT_BCS_BATCH(batch,
1827                   1 << 31 |     /* slice concealment */
1828                   (height_in_mbs - 1) << 16 |
1829                   (width_in_mbs - 1));
1830     OUT_BCS_BATCH(batch, 0);
1831     OUT_BCS_BATCH(batch, 0);
1832     OUT_BCS_BATCH(batch,
1833                   0xFFF << 16 | /* InterMBMaxSize */
1834                   0xFFF << 0 |  /* IntraMBMaxSize */
1835                   0);
1836     OUT_BCS_BATCH(batch, 0);
1837     OUT_BCS_BATCH(batch, 0);
1838     OUT_BCS_BATCH(batch, 0);
1839     OUT_BCS_BATCH(batch, 0);
1840     OUT_BCS_BATCH(batch, 0);
1841     OUT_BCS_BATCH(batch, 0);
1842     ADVANCE_BCS_BATCH(batch);
1843 }
1844
1845 static void
1846 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1847 {
1848     unsigned char intra_qm[64] = {
1849          8, 16, 19, 22, 26, 27, 29, 34,
1850         16, 16, 22, 24, 27, 29, 34, 37,
1851         19, 22, 26, 27, 29, 34, 34, 38,
1852         22, 22, 26, 27, 29, 34, 37, 40,
1853         22, 26, 27, 29, 32, 35, 40, 48,
1854         26, 27, 29, 32, 35, 40, 48, 58,
1855         26, 27, 29, 34, 38, 46, 56, 69,
1856         27, 29, 35, 38, 46, 56, 69, 83
1857     };
1858
1859     unsigned char non_intra_qm[64] = {
1860         16, 16, 16, 16, 16, 16, 16, 16,
1861         16, 16, 16, 16, 16, 16, 16, 16,
1862         16, 16, 16, 16, 16, 16, 16, 16,
1863         16, 16, 16, 16, 16, 16, 16, 16,
1864         16, 16, 16, 16, 16, 16, 16, 16,
1865         16, 16, 16, 16, 16, 16, 16, 16,
1866         16, 16, 16, 16, 16, 16, 16, 16,
1867         16, 16, 16, 16, 16, 16, 16, 16
1868     };
1869
1870     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1871     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1872 }
1873
1874 static void
1875 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1876 {
1877     unsigned short intra_fqm[64] = {
1878          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1879          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1880          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1881          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1882          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1883          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1884          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1885          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1886     };
1887
1888     unsigned short non_intra_fqm[64] = {
1889         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1890         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1891         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1892         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1893         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1894         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1895         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1896         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1897     };
1898
1899     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1900     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1901 }
1902
1903 static void
1904 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1905                                  struct intel_encoder_context *encoder_context,
1906                                  int x, int y,
1907                                  int next_x, int next_y,
1908                                  int is_fisrt_slice_group,
1909                                  int is_last_slice_group,
1910                                  int intra_slice,
1911                                  int qp,
1912                                  struct intel_batchbuffer *batch)
1913 {
1914     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1915
1916     if (batch == NULL)
1917         batch = encoder_context->base.batch;
1918
1919     BEGIN_BCS_BATCH(batch, 8);
1920
1921     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1922     OUT_BCS_BATCH(batch,
1923                   0 << 31 |                             /* MbRateCtrlFlag */
1924                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1925                   1 << 17 |                             /* Insert Header before the first slice group data */
1926                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1927                   1 << 15 |                             /* TailPresentFlag: always 1 */
1928                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1929                   !!intra_slice << 13 |                 /* IntraSlice */
1930                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1931                   0);
1932     OUT_BCS_BATCH(batch,
1933                   next_y << 24 |
1934                   next_x << 16 |
1935                   y << 8 |
1936                   x << 0 |
1937                   0);
1938     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1939     /* bitstream pointer is only loaded once for the first slice of a frame when 
1940      * LoadSlicePointerFlag is 0
1941      */
1942     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1943     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1944     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1945     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1946
1947     ADVANCE_BCS_BATCH(batch);
1948 }
1949
1950 static int
1951 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1952                                  struct intel_encoder_context *encoder_context,
1953                                  int x, int y,
1954                                  int first_mb_in_slice,
1955                                  int last_mb_in_slice,
1956                                  int first_mb_in_slice_group,
1957                                  int last_mb_in_slice_group,
1958                                  int mb_type,
1959                                  int qp_scale_code,
1960                                  int coded_block_pattern,
1961                                  unsigned char target_size_in_word,
1962                                  unsigned char max_size_in_word,
1963                                  struct intel_batchbuffer *batch)
1964 {
1965     int len_in_dwords = 9;
1966
1967     if (batch == NULL)
1968         batch = encoder_context->base.batch;
1969
1970     BEGIN_BCS_BATCH(batch, len_in_dwords);
1971
1972     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1973     OUT_BCS_BATCH(batch,
1974                   0 << 24 |     /* PackedMvNum */
1975                   0 << 20 |     /* MvFormat */
1976                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1977                   0 << 15 |     /* TransformFlag: frame DCT */
1978                   0 << 14 |     /* FieldMbFlag */
1979                   1 << 13 |     /* IntraMbFlag */
1980                   mb_type << 8 |   /* MbType: Intra */
1981                   0 << 2 |      /* SkipMbFlag */
1982                   0 << 0 |      /* InterMbMode */
1983                   0);
1984     OUT_BCS_BATCH(batch, y << 16 | x);
1985     OUT_BCS_BATCH(batch,
1986                   max_size_in_word << 24 |
1987                   target_size_in_word << 16 |
1988                   coded_block_pattern << 6 |      /* CBP */
1989                   0);
1990     OUT_BCS_BATCH(batch,
1991                   last_mb_in_slice << 31 |
1992                   first_mb_in_slice << 30 |
1993                   0 << 27 |     /* EnableCoeffClamp */
1994                   last_mb_in_slice_group << 26 |
1995                   0 << 25 |     /* MbSkipConvDisable */
1996                   first_mb_in_slice_group << 24 |
1997                   0 << 16 |     /* MvFieldSelect */
1998                   qp_scale_code << 0 |
1999                   0);
2000     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
2001     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
2002     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2003     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2004
2005     ADVANCE_BCS_BATCH(batch);
2006
2007     return len_in_dwords;
2008 }
2009
2010 #define MPEG2_INTER_MV_OFFSET   12 
2011
2012 static struct _mv_ranges
2013 {
2014     int low;    /* in the unit of 1/2 pixel */
2015     int high;   /* in the unit of 1/2 pixel */
2016 } mv_ranges[] = {
2017     {0, 0},
2018     {-16, 15},
2019     {-32, 31},
2020     {-64, 63},
2021     {-128, 127},
2022     {-256, 255},
2023     {-512, 511},
2024     {-1024, 1023},
2025     {-2048, 2047},
2026     {-4096, 4095}
2027 };
2028
2029 static int
2030 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
2031 {
2032     if (mv + pos * 16 * 2 < 0 ||
2033         mv + (pos + 1) * 16 * 2 > display_max * 2)
2034         mv = 0;
2035
2036     if (f_code > 0 && f_code < 10) {
2037         if (mv < mv_ranges[f_code].low)
2038             mv = mv_ranges[f_code].low;
2039
2040         if (mv > mv_ranges[f_code].high)
2041             mv = mv_ranges[f_code].high;
2042     }
2043
2044     return mv;
2045 }
2046
2047 static int
2048 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2049                                  struct encode_state *encode_state,
2050                                  struct intel_encoder_context *encoder_context,
2051                                  unsigned int *msg,
2052                                  int width_in_mbs, int height_in_mbs,
2053                                  int x, int y,
2054                                  int first_mb_in_slice,
2055                                  int last_mb_in_slice,
2056                                  int first_mb_in_slice_group,
2057                                  int last_mb_in_slice_group,
2058                                  int qp_scale_code,
2059                                  unsigned char target_size_in_word,
2060                                  unsigned char max_size_in_word,
2061                                  struct intel_batchbuffer *batch)
2062 {
2063     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2064     int len_in_dwords = 9;
2065     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2066     
2067     if (batch == NULL)
2068         batch = encoder_context->base.batch;
2069
2070     mvptr = (short *)msg;
2071     mvx0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2072     mvy0 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2073     mvx1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2074     mvy1 = mpeg2_motion_vector(mvptr[MPEG2_INTER_MV_OFFSET + 3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2075
2076     BEGIN_BCS_BATCH(batch, len_in_dwords);
2077
2078     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2079     OUT_BCS_BATCH(batch,
2080                   2 << 24 |     /* PackedMvNum */
2081                   7 << 20 |     /* MvFormat */
2082                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2083                   0 << 15 |     /* TransformFlag: frame DCT */
2084                   0 << 14 |     /* FieldMbFlag */
2085                   0 << 13 |     /* IntraMbFlag */
2086                   1 << 8 |      /* MbType: Frame-based */
2087                   0 << 2 |      /* SkipMbFlag */
2088                   0 << 0 |      /* InterMbMode */
2089                   0);
2090     OUT_BCS_BATCH(batch, y << 16 | x);
2091     OUT_BCS_BATCH(batch,
2092                   max_size_in_word << 24 |
2093                   target_size_in_word << 16 |
2094                   0x3f << 6 |   /* CBP */
2095                   0);
2096     OUT_BCS_BATCH(batch,
2097                   last_mb_in_slice << 31 |
2098                   first_mb_in_slice << 30 |
2099                   0 << 27 |     /* EnableCoeffClamp */
2100                   last_mb_in_slice_group << 26 |
2101                   0 << 25 |     /* MbSkipConvDisable */
2102                   first_mb_in_slice_group << 24 |
2103                   0 << 16 |     /* MvFieldSelect */
2104                   qp_scale_code << 0 |
2105                   0);
2106
2107     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2108     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2109     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2110     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2111
2112     ADVANCE_BCS_BATCH(batch);
2113
2114     return len_in_dwords;
2115 }
2116
2117 static void
2118 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2119                                            struct encode_state *encode_state,
2120                                            struct intel_encoder_context *encoder_context,
2121                                            struct intel_batchbuffer *slice_batch)
2122 {
2123     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2124     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2125
2126     if (encode_state->packed_header_data[idx]) {
2127         VAEncPackedHeaderParameterBuffer *param = NULL;
2128         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2129         unsigned int length_in_bits;
2130
2131         assert(encode_state->packed_header_param[idx]);
2132         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2133         length_in_bits = param->bit_length;
2134
2135         mfc_context->insert_object(ctx,
2136                                    encoder_context,
2137                                    header_data,
2138                                    ALIGN(length_in_bits, 32) >> 5,
2139                                    length_in_bits & 0x1f,
2140                                    5,   /* FIXME: check it */
2141                                    0,
2142                                    0,
2143                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2144                                    slice_batch);
2145     }
2146
2147     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2148
2149     if (encode_state->packed_header_data[idx]) {
2150         VAEncPackedHeaderParameterBuffer *param = NULL;
2151         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2152         unsigned int length_in_bits;
2153
2154         assert(encode_state->packed_header_param[idx]);
2155         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2156         length_in_bits = param->bit_length;
2157
2158         mfc_context->insert_object(ctx,
2159                                    encoder_context,
2160                                    header_data,
2161                                    ALIGN(length_in_bits, 32) >> 5,
2162                                    length_in_bits & 0x1f,
2163                                    5,   /* FIXME: check it */
2164                                    0,
2165                                    0,
2166                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2167                                    slice_batch);
2168     }
2169 }
2170
2171 static void 
2172 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2173                                      struct encode_state *encode_state,
2174                                      struct intel_encoder_context *encoder_context,
2175                                      int slice_index,
2176                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2177                                      struct intel_batchbuffer *slice_batch)
2178 {
2179     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2180     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2181     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2182     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2183     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2184     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2185     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2186     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2187     int i, j;
2188     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2189     unsigned int *msg = NULL, offset = 0;
2190     unsigned char *msg_ptr = NULL;
2191
2192     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2193     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2194     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2195     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2196
2197     dri_bo_map(vme_context->vme_output.bo , 0);
2198     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2199
2200     if (next_slice_group_param) {
2201         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2202         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2203     } else {
2204         h_next_start_pos = 0;
2205         v_next_start_pos = height_in_mbs;
2206     }
2207
2208     gen75_mfc_mpeg2_slicegroup_state(ctx,
2209                                      encoder_context,
2210                                      h_start_pos,
2211                                      v_start_pos,
2212                                      h_next_start_pos,
2213                                      v_next_start_pos,
2214                                      slice_index == 0,
2215                                      next_slice_group_param == NULL,
2216                                      slice_param->is_intra_slice,
2217                                      slice_param->quantiser_scale_code,
2218                                      slice_batch);
2219
2220     if (slice_index == 0) 
2221         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2222
2223     /* Insert '00' to make sure the header is valid */
2224     mfc_context->insert_object(ctx,
2225                                encoder_context,
2226                                (unsigned int*)section_delimiter,
2227                                1,
2228                                8,   /* 8bits in the last DWORD */
2229                                1,   /* 1 byte */
2230                                1,
2231                                0,
2232                                0,
2233                                slice_batch);
2234
2235     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2236         /* PAK for each macroblocks */
2237         for (j = 0; j < slice_param->num_macroblocks; j++) {
2238             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2239             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2240             int first_mb_in_slice = (j == 0);
2241             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2242             int first_mb_in_slice_group = (i == 0 && j == 0);
2243             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2244                                           j == slice_param->num_macroblocks - 1);
2245
2246             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2247
2248             if (slice_param->is_intra_slice) {
2249                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2250                                                  encoder_context,
2251                                                  h_pos, v_pos,
2252                                                  first_mb_in_slice,
2253                                                  last_mb_in_slice,
2254                                                  first_mb_in_slice_group,
2255                                                  last_mb_in_slice_group,
2256                                                  0x1a,
2257                                                  slice_param->quantiser_scale_code,
2258                                                  0x3f,
2259                                                  0,
2260                                                  0xff,
2261                                                  slice_batch);
2262             } else {
2263                 gen75_mfc_mpeg2_pak_object_inter(ctx,
2264                                                  encode_state,
2265                                                  encoder_context,
2266                                                  msg,
2267                                                  width_in_mbs, height_in_mbs,
2268                                                  h_pos, v_pos,
2269                                                  first_mb_in_slice,
2270                                                  last_mb_in_slice,
2271                                                  first_mb_in_slice_group,
2272                                                  last_mb_in_slice_group,
2273                                                  slice_param->quantiser_scale_code,
2274                                                  0,
2275                                                  0xff,
2276                                                  slice_batch);
2277             }
2278         }
2279
2280         slice_param++;
2281     }
2282
2283     dri_bo_unmap(vme_context->vme_output.bo);
2284
2285     /* tail data */
2286     if (next_slice_group_param == NULL) { /* end of a picture */
2287         mfc_context->insert_object(ctx,
2288                                    encoder_context,
2289                                    (unsigned int *)tail_delimiter,
2290                                    2,
2291                                    8,   /* 8bits in the last DWORD */
2292                                    5,   /* 5 bytes */
2293                                    1,
2294                                    1,
2295                                    0,
2296                                    slice_batch);
2297     } else {        /* end of a lsice group */
2298         mfc_context->insert_object(ctx,
2299                                    encoder_context,
2300                                    (unsigned int *)section_delimiter,
2301                                    1,
2302                                    8,   /* 8bits in the last DWORD */
2303                                    1,   /* 1 byte */
2304                                    1,
2305                                    1,
2306                                    0,
2307                                    slice_batch);
2308     }
2309 }
2310
2311 /* 
2312  * A batch buffer for all slices, including slice state, 
2313  * slice insert object and slice pak object commands
2314  *
2315  */
2316 static dri_bo *
2317 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2318                                            struct encode_state *encode_state,
2319                                            struct intel_encoder_context *encoder_context)
2320 {
2321     struct i965_driver_data *i965 = i965_driver_data(ctx);
2322     struct intel_batchbuffer *batch;
2323     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2324     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2325     dri_bo *batch_bo;
2326     int i;
2327     int buffer_size;
2328     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2329     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2330
2331     buffer_size = width_in_mbs * height_in_mbs * 64;
2332     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
2333     batch_bo = batch->buffer;
2334
2335     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2336         if (i == encode_state->num_slice_params_ext - 1)
2337             next_slice_group_param = NULL;
2338         else
2339             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2340
2341         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2342     }
2343
2344     intel_batchbuffer_align(batch, 8);
2345     
2346     BEGIN_BCS_BATCH(batch, 2);
2347     OUT_BCS_BATCH(batch, 0);
2348     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2349     ADVANCE_BCS_BATCH(batch);
2350
2351     dri_bo_reference(batch_bo);
2352     intel_batchbuffer_free(batch);
2353
2354     return batch_bo;
2355 }
2356
2357 static void
2358 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2359                                             struct encode_state *encode_state,
2360                                             struct intel_encoder_context *encoder_context)
2361 {
2362     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2363
2364     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2365     mfc_context->set_surface_state(ctx, encoder_context);
2366     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2367     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2368     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2369     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2370     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2371     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2372 }
2373
2374 static void
2375 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2376                                     struct encode_state *encode_state,
2377                                     struct intel_encoder_context *encoder_context)
2378 {
2379     struct intel_batchbuffer *batch = encoder_context->base.batch;
2380     dri_bo *slice_batch_bo;
2381
2382     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2383
2384     // begin programing
2385     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2386     intel_batchbuffer_emit_mi_flush(batch);
2387     
2388     // picture level programing
2389     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2390
2391     BEGIN_BCS_BATCH(batch, 2);
2392     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2393     OUT_BCS_RELOC(batch,
2394                   slice_batch_bo,
2395                   I915_GEM_DOMAIN_COMMAND, 0, 
2396                   0);
2397     ADVANCE_BCS_BATCH(batch);
2398
2399     // end programing
2400     intel_batchbuffer_end_atomic(batch);
2401
2402     dri_bo_unreference(slice_batch_bo);
2403 }
2404
2405 static VAStatus
2406 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2407                         struct encode_state *encode_state,
2408                         struct intel_encoder_context *encoder_context)
2409 {
2410     struct i965_driver_data *i965 = i965_driver_data(ctx);
2411     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2412     struct object_surface *obj_surface; 
2413     struct object_buffer *obj_buffer;
2414     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2415     struct i965_coded_buffer_segment *coded_buffer_segment;
2416     VAStatus vaStatus = VA_STATUS_SUCCESS;
2417     dri_bo *bo;
2418     int i;
2419
2420     /* reconstructed surface */
2421     obj_surface = SURFACE(pic_param->reconstructed_picture);
2422     assert(obj_surface);
2423     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2424     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2425     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2426     mfc_context->surface_state.width = obj_surface->orig_width;
2427     mfc_context->surface_state.height = obj_surface->orig_height;
2428     mfc_context->surface_state.w_pitch = obj_surface->width;
2429     mfc_context->surface_state.h_pitch = obj_surface->height;
2430
2431     /* forward reference */
2432     obj_surface = SURFACE(pic_param->forward_reference_picture);
2433
2434     if (obj_surface && obj_surface->bo) {
2435         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2436         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2437     } else
2438         mfc_context->reference_surfaces[0].bo = NULL;
2439
2440     /* backward reference */
2441     obj_surface = SURFACE(pic_param->backward_reference_picture);
2442
2443     if (obj_surface && obj_surface->bo) {
2444         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2445         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2446     } else {
2447         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2448
2449         if (mfc_context->reference_surfaces[1].bo)
2450             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2451     }
2452
2453     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2454         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2455
2456         if (mfc_context->reference_surfaces[i].bo)
2457             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2458     }
2459     
2460     /* input YUV surface */
2461     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2462     assert(obj_surface && obj_surface->bo);
2463     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2464     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2465
2466     /* coded buffer */
2467     obj_buffer = BUFFER(pic_param->coded_buf);
2468     bo = obj_buffer->buffer_store->bo;
2469     assert(bo);
2470     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2471     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2472     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2473     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2474
2475     /* set the internal flag to 0 to indicate the coded size is unknown */
2476     dri_bo_map(bo, 1);
2477     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2478     coded_buffer_segment->mapped = 0;
2479     coded_buffer_segment->codec = CODED_MPEG2;
2480     dri_bo_unmap(bo);
2481
2482     return vaStatus;
2483 }
2484
2485 static VAStatus
2486 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2487                                struct encode_state *encode_state,
2488                                struct intel_encoder_context *encoder_context)
2489 {
2490     gen75_mfc_init(ctx, encode_state, encoder_context);
2491     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2492     /*Programing bcs pipeline*/
2493     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2494     gen75_mfc_run(ctx, encode_state, encoder_context);
2495
2496     return VA_STATUS_SUCCESS;
2497 }
2498
2499 static void
2500 gen75_mfc_context_destroy(void *context)
2501 {
2502     struct gen6_mfc_context *mfc_context = context;
2503     int i;
2504
2505     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2506     mfc_context->post_deblocking_output.bo = NULL;
2507
2508     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2509     mfc_context->pre_deblocking_output.bo = NULL;
2510
2511     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2512     mfc_context->uncompressed_picture_source.bo = NULL;
2513
2514     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2515     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2516
2517     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2518         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2519         mfc_context->direct_mv_buffers[i].bo = NULL;
2520     }
2521
2522     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2523     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2524
2525     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2526     mfc_context->macroblock_status_buffer.bo = NULL;
2527
2528     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2529     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2530
2531     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2532     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2533
2534
2535     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2536         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2537         mfc_context->reference_surfaces[i].bo = NULL;  
2538     }
2539
2540     i965_gpe_context_destroy(&mfc_context->gpe_context);
2541
2542     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2543     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2544
2545     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2546     mfc_context->aux_batchbuffer_surface.bo = NULL;
2547
2548     if (mfc_context->aux_batchbuffer)
2549         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2550
2551     mfc_context->aux_batchbuffer = NULL;
2552
2553     free(mfc_context);
2554 }
2555
2556 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2557                   VAProfile profile,
2558                   struct encode_state *encode_state,
2559                   struct intel_encoder_context *encoder_context)
2560 {
2561     VAStatus vaStatus;
2562
2563     switch (profile) {
2564     case VAProfileH264Baseline:
2565     case VAProfileH264Main:
2566     case VAProfileH264High:
2567         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2568         break;
2569
2570         /* FIXME: add for other profile */
2571     case VAProfileMPEG2Simple:
2572     case VAProfileMPEG2Main:
2573         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2574         break;
2575
2576     default:
2577         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2578         break;
2579     }
2580
2581     return vaStatus;
2582 }
2583
2584 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2585 {
2586     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2587
2588     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2589
2590     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2591     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2592
2593     mfc_context->gpe_context.curbe.length = 32 * 4;
2594
2595     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2596     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2597     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2598     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2599     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2600
2601     i965_gpe_load_kernels(ctx,
2602                           &mfc_context->gpe_context,
2603                           gen75_mfc_kernels,
2604                           NUM_MFC_KERNEL);
2605
2606     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2607     mfc_context->set_surface_state = gen75_mfc_surface_state;
2608     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2609     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2610     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2611     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2612     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2613     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2614
2615     encoder_context->mfc_context = mfc_context;
2616     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2617     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2618     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2619
2620     return True;
2621 }