Avoid the duplicated macro-definition of surface size
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define MFC_SOFTWARE_HASWELL    0
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
57
58 #define B0_STEP_REV             2
59 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
60
61 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
62 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
63 };
64
65 static struct i965_kernel gen75_mfc_kernels[] = {
66     {
67         "MFC AVC INTRA BATCHBUFFER ",
68         MFC_BATCHBUFFER_AVC_INTRA,
69         gen75_mfc_batchbuffer_avc,
70         sizeof(gen75_mfc_batchbuffer_avc),
71         NULL
72     },
73 };
74
75 #define         INTER_MODE_MASK         0x03
76 #define         INTER_8X8               0x03
77 #define         INTER_16X8              0x01
78 #define         INTER_8X16              0x02
79 #define         SUBMB_SHAPE_MASK        0x00FF00
80
81 #define         INTER_MV8               (4 << 20)
82 #define         INTER_MV32              (6 << 20)
83
84
85 static void
86 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87                            int standard_select,
88                            struct intel_encoder_context *encoder_context)
89 {
90     struct intel_batchbuffer *batch = encoder_context->base.batch;
91     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92     assert(standard_select == MFX_FORMAT_MPEG2 ||
93            standard_select == MFX_FORMAT_AVC);
94
95     BEGIN_BCS_BATCH(batch, 5);
96
97     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
98     OUT_BCS_BATCH(batch,
99                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
100                   (MFD_MODE_VLD << 15) | /* VLD mode */
101                   (0 << 10) | /* Stream-Out Enable */
102                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
103                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
104                   (0 << 5)  | /* not in stitch mode */
105                   (1 << 4)  | /* encoding mode */
106                   (standard_select << 0));  /* standard select: avc or mpeg2 */
107     OUT_BCS_BATCH(batch,
108                   (0 << 7)  | /* expand NOA bus flag */
109                   (0 << 6)  | /* disable slice-level clock gating */
110                   (0 << 5)  | /* disable clock gating for NOA */
111                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
112                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
113                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
114                   (0 << 1)  |
115                   (0 << 0));
116     OUT_BCS_BATCH(batch, 0);
117     OUT_BCS_BATCH(batch, 0);
118
119     ADVANCE_BCS_BATCH(batch);
120 }
121
122 static void
123 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
124 {
125     struct intel_batchbuffer *batch = encoder_context->base.batch;
126     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
127
128     BEGIN_BCS_BATCH(batch, 6);
129
130     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
131     OUT_BCS_BATCH(batch, 0);
132     OUT_BCS_BATCH(batch,
133                   ((mfc_context->surface_state.height - 1) << 18) |
134                   ((mfc_context->surface_state.width - 1) << 4));
135     OUT_BCS_BATCH(batch,
136                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
137                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
138                   (0 << 22) | /* surface object control state, FIXME??? */
139                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
140                   (0 << 2)  | /* must be 0 for interleave U/V */
141                   (1 << 1)  | /* must be tiled */
142                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
143     OUT_BCS_BATCH(batch,
144                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
145                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
146     OUT_BCS_BATCH(batch, 0);
147
148     ADVANCE_BCS_BATCH(batch);
149 }
150
151 static void
152 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
153                                         struct intel_encoder_context *encoder_context)
154 {
155     struct intel_batchbuffer *batch = encoder_context->base.batch;
156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     BEGIN_BCS_BATCH(batch, 26);
160
161     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
162     /* the DW1-3 is for the MFX indirect bistream offset */
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165     OUT_BCS_BATCH(batch, 0);
166     /* the DW4-5 is the MFX upper bound */
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169
170     /* the DW6-10 is for MFX Indirect MV Object Base Address */
171     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
192     OUT_BCS_RELOC(batch,
193                   mfc_context->mfc_indirect_pak_bse_object.bo,
194                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195                   0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198         
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
203     OUT_BCS_BATCH(batch, 0);
204
205     ADVANCE_BCS_BATCH(batch);
206 }
207
208 static void
209 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
210 {
211     struct intel_batchbuffer *batch = encoder_context->base.batch;
212     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
213     struct gen6_vme_context *vme_context = encoder_context->vme_context;
214     struct i965_driver_data *i965 = i965_driver_data(ctx);
215
216     if (IS_STEPPING_BPLUS(i965)) {
217         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
218         return;
219     }
220
221     BEGIN_BCS_BATCH(batch, 11);
222
223     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     /* MFX Indirect MV Object Base Address */
227     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
228     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
234     OUT_BCS_RELOC(batch,
235                   mfc_context->mfc_indirect_pak_bse_object.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   0);
238     OUT_BCS_RELOC(batch,
239                   mfc_context->mfc_indirect_pak_bse_object.bo,
240                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
241                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
242
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
248                         struct intel_encoder_context *encoder_context)
249 {
250     struct intel_batchbuffer *batch = encoder_context->base.batch;
251     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
252     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
253
254     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
255     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
256
257     BEGIN_BCS_BATCH(batch, 16);
258
259     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
260     /*DW1. MB setting of frame */
261     OUT_BCS_BATCH(batch,
262                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
263     OUT_BCS_BATCH(batch, 
264                   ((height_in_mbs - 1) << 16) | 
265                   ((width_in_mbs - 1) << 0));
266     /* DW3 QP setting */
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     /* DW5 Trellis quantization */
293     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
294     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
295                   (0xBB8 << 16) |       /* InterMbMaxSz */
296                   (0xEE8) );            /* IntraMbMaxSz */
297     OUT_BCS_BATCH(batch, 0);            /* Reserved */
298     /* DW8. QP delta */
299     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
300     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
301     /* DW10. Bit setting for MB */
302     OUT_BCS_BATCH(batch, 0x8C000000);
303     OUT_BCS_BATCH(batch, 0x00010000);
304     /* DW12. */
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x02010100);
307     /* DW14. For short format */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310
311     ADVANCE_BCS_BATCH(batch);
312 }
313
314 static void
315 gen75_mfc_qm_state(VADriverContextP ctx,
316                    int qm_type,
317                    unsigned int *qm,
318                    int qm_length,
319                    struct intel_encoder_context *encoder_context)
320 {
321     struct intel_batchbuffer *batch = encoder_context->base.batch;
322     unsigned int qm_buffer[16];
323
324     assert(qm_length <= 16);
325     assert(sizeof(*qm) == 4);
326     memcpy(qm_buffer, qm, qm_length * 4);
327
328     BEGIN_BCS_BATCH(batch, 18);
329     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
330     OUT_BCS_BATCH(batch, qm_type << 0);
331     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335 static void
336 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
337 {
338     unsigned int qm[16] = {
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010,
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010
343     };
344
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
349 }
350
351 static void
352 gen75_mfc_fqm_state(VADriverContextP ctx,
353                     int fqm_type,
354                     unsigned int *fqm,
355                     int fqm_length,
356                     struct intel_encoder_context *encoder_context)
357 {
358     struct intel_batchbuffer *batch = encoder_context->base.batch;
359     unsigned int fqm_buffer[32];
360
361     assert(fqm_length <= 32);
362     assert(sizeof(*fqm) == 4);
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
367     OUT_BCS_BATCH(batch, fqm_type << 0);
368     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
374 {
375     unsigned int qm[32] = {
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000
384     };
385
386     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
387     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
390 }
391
392 static void
393 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
394                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
395                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
396                             struct intel_batchbuffer *batch)
397 {
398     if (batch == NULL)
399         batch = encoder_context->base.batch;
400
401     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
402
403     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
404     OUT_BCS_BATCH(batch,
405                   (0 << 16) |   /* always start at offset 0 */
406                   (data_bits_in_last_dw << 8) |
407                   (skip_emul_byte_count << 4) |
408                   (!!emulation_flag << 3) |
409                   ((!!is_last_header) << 2) |
410                   ((!!is_end_of_slice) << 1) |
411                   (0 << 0));    /* FIXME: ??? */
412     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
413
414     ADVANCE_BCS_BATCH(batch);
415 }
416
417
418 static void gen75_mfc_init(VADriverContextP ctx,
419                            struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     struct i965_driver_data *i965 = i965_driver_data(ctx);
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424     dri_bo *bo;
425     int i;
426     int width_in_mbs = 0;
427     int height_in_mbs = 0;
428     int slice_batchbuffer_size;
429
430     if (encoder_context->codec == CODEC_H264) {
431         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
432         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
433         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
434     } else {
435         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
436
437         assert(encoder_context->codec == CODEC_MPEG2);
438
439         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
440         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
441     }
442
443     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
444                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
445
446     /*Encode common setup for MFC*/
447     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
448     mfc_context->post_deblocking_output.bo = NULL;
449
450     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
451     mfc_context->pre_deblocking_output.bo = NULL;
452
453     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
454     mfc_context->uncompressed_picture_source.bo = NULL;
455
456     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
457     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
458
459     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
460         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
461         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
462         mfc_context->direct_mv_buffers[i].bo = NULL;
463     }
464
465     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
466         if (mfc_context->reference_surfaces[i].bo != NULL)
467             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
468         mfc_context->reference_surfaces[i].bo = NULL;  
469     }
470
471     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
472     bo = dri_bo_alloc(i965->intel.bufmgr,
473                       "Buffer",
474                       width_in_mbs * 64,
475                       64);
476     assert(bo);
477     mfc_context->intra_row_store_scratch_buffer.bo = bo;
478
479     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
480     bo = dri_bo_alloc(i965->intel.bufmgr,
481                       "Buffer",
482                       width_in_mbs * height_in_mbs * 16,
483                       64);
484     assert(bo);
485     mfc_context->macroblock_status_buffer.bo = bo;
486
487     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
488     bo = dri_bo_alloc(i965->intel.bufmgr,
489                       "Buffer",
490                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
491                       64);
492     assert(bo);
493     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
494
495     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
496     bo = dri_bo_alloc(i965->intel.bufmgr,
497                       "Buffer",
498                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
499                       0x1000);
500     assert(bo);
501     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
502
503     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
504     mfc_context->mfc_batchbuffer_surface.bo = NULL;
505
506     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
507     mfc_context->aux_batchbuffer_surface.bo = NULL;
508
509     if (mfc_context->aux_batchbuffer)
510         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
511
512     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
513                                                         slice_batchbuffer_size);
514     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
515     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
516     mfc_context->aux_batchbuffer_surface.pitch = 16;
517     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
518     mfc_context->aux_batchbuffer_surface.size_block = 16;
519
520     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
521 }
522
523 static void
524 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
525                                     struct intel_encoder_context *encoder_context)
526 {
527     struct intel_batchbuffer *batch = encoder_context->base.batch;
528     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
529     int i;
530
531     BEGIN_BCS_BATCH(batch, 61);
532
533     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
534
535     /* the DW1-3 is for pre_deblocking */
536     if (mfc_context->pre_deblocking_output.bo)
537         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
538                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
539                       0);
540     else
541         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
542
543     OUT_BCS_BATCH(batch, 0);
544     OUT_BCS_BATCH(batch, 0);
545     /* the DW4-6 is for the post_deblocking */
546
547     if (mfc_context->post_deblocking_output.bo)
548         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
549                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
550                       0);                                                                                       /* post output addr  */ 
551     else
552         OUT_BCS_BATCH(batch, 0);
553     OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the DW7-9 is for the uncompressed_picture */
557     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
558                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
559                   0); /* uncompressed data */
560
561     OUT_BCS_BATCH(batch, 0);
562     OUT_BCS_BATCH(batch, 0);
563
564     /* the DW10-12 is for the mb status */
565     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
566                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
567                   0); /* StreamOut data*/
568     OUT_BCS_BATCH(batch, 0);
569     OUT_BCS_BATCH(batch, 0);
570
571     /* the DW13-15 is for the intra_row_store_scratch */
572     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
573                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
574                   0);   
575     OUT_BCS_BATCH(batch, 0);
576     OUT_BCS_BATCH(batch, 0);
577
578     /* the DW16-18 is for the deblocking filter */
579     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
580                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
581                   0);
582     OUT_BCS_BATCH(batch, 0);
583     OUT_BCS_BATCH(batch, 0);
584
585     /* the DW 19-50 is for Reference pictures*/
586     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
587         if ( mfc_context->reference_surfaces[i].bo != NULL) {
588             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
589                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
590                           0);                   
591         } else {
592             OUT_BCS_BATCH(batch, 0);
593         }
594         OUT_BCS_BATCH(batch, 0);
595     }
596     OUT_BCS_BATCH(batch, 0);
597
598     /* The DW 52-54 is for the MB status buffer */
599     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
600                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
601                   0);                                                                                   /* Macroblock status buffer*/
602         
603     OUT_BCS_BATCH(batch, 0);
604     OUT_BCS_BATCH(batch, 0);
605
606     /* the DW 55-57 is the ILDB buffer */
607     OUT_BCS_BATCH(batch, 0);
608     OUT_BCS_BATCH(batch, 0);
609     OUT_BCS_BATCH(batch, 0);
610
611     /* the DW 58-60 is the second ILDB buffer */
612     OUT_BCS_BATCH(batch, 0);
613     OUT_BCS_BATCH(batch, 0);
614     OUT_BCS_BATCH(batch, 0);
615     ADVANCE_BCS_BATCH(batch);
616 }
617
618 static void
619 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
620 {
621     struct intel_batchbuffer *batch = encoder_context->base.batch;
622     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
623     struct i965_driver_data *i965 = i965_driver_data(ctx);
624     int i;
625
626     if (IS_STEPPING_BPLUS(i965)) {
627         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
628         return;
629     }
630
631     BEGIN_BCS_BATCH(batch, 25);
632
633     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
634
635     if (mfc_context->pre_deblocking_output.bo)
636         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
637                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
638                       0);
639     else
640         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
641
642     if (mfc_context->post_deblocking_output.bo)
643         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
644                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
645                       0);                                                                                       /* post output addr  */ 
646     else
647         OUT_BCS_BATCH(batch, 0);
648
649     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
650                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
651                   0);                                                                                   /* uncompressed data */
652     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
653                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
654                   0);                                                                                   /* StreamOut data*/
655     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
656                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657                   0);   
658     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
659                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
660                   0);
661     /* 7..22 Reference pictures*/
662     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
663         if ( mfc_context->reference_surfaces[i].bo != NULL) {
664             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
665                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
666                           0);                   
667         } else {
668             OUT_BCS_BATCH(batch, 0);
669         }
670     }
671     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
672                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
673                   0);                                                                                   /* Macroblock status buffer*/
674
675     OUT_BCS_BATCH(batch, 0);
676
677     ADVANCE_BCS_BATCH(batch);
678 }
679
680 static void
681 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
682                                      struct intel_encoder_context *encoder_context)
683 {
684     struct intel_batchbuffer *batch = encoder_context->base.batch;
685     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
686
687     int i;
688
689     BEGIN_BCS_BATCH(batch, 71);
690
691     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
692
693     /* Reference frames and Current frames */
694     /* the DW1-32 is for the direct MV for reference */
695     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
696         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
697             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
698                           I915_GEM_DOMAIN_INSTRUCTION, 0,
699                           0);
700             OUT_BCS_BATCH(batch, 0);
701         } else {
702             OUT_BCS_BATCH(batch, 0);
703             OUT_BCS_BATCH(batch, 0);
704         }
705     }
706     OUT_BCS_BATCH(batch, 0);
707
708     /* the DW34-36 is the MV for the current reference */
709     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
710                   I915_GEM_DOMAIN_INSTRUCTION, 0,
711                   0);
712
713     OUT_BCS_BATCH(batch, 0);
714     OUT_BCS_BATCH(batch, 0);
715
716     /* POL list */
717     for(i = 0; i < 32; i++) {
718         OUT_BCS_BATCH(batch, i/2);
719     }
720     OUT_BCS_BATCH(batch, 0);
721     OUT_BCS_BATCH(batch, 0);
722
723     ADVANCE_BCS_BATCH(batch);
724 }
725
726 static void
727 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
728 {
729     struct intel_batchbuffer *batch = encoder_context->base.batch;
730     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
731     struct i965_driver_data *i965 = i965_driver_data(ctx);
732     int i;
733
734     if (IS_STEPPING_BPLUS(i965)) {
735         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
736         return;
737     }
738
739     BEGIN_BCS_BATCH(batch, 69);
740
741     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
742
743     /* Reference frames and Current frames */
744     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
745         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
746             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
747                           I915_GEM_DOMAIN_INSTRUCTION, 0,
748                           0);
749         } else {
750             OUT_BCS_BATCH(batch, 0);
751         }
752     }
753
754     /* POL list */
755     for(i = 0; i < 32; i++) {
756         OUT_BCS_BATCH(batch, i/2);
757     }
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch, 0);
760
761     ADVANCE_BCS_BATCH(batch);
762 }
763
764
765 static void
766 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
767                                         struct intel_encoder_context *encoder_context)
768 {
769     struct intel_batchbuffer *batch = encoder_context->base.batch;
770     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
771
772     BEGIN_BCS_BATCH(batch, 10);
773
774     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
775     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
776                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
777                   0);
778     OUT_BCS_BATCH(batch, 0);
779     OUT_BCS_BATCH(batch, 0);
780         
781     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
782     OUT_BCS_BATCH(batch, 0);
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785
786     /* the DW7-9 is for Bitplane Read Buffer Base Address */
787     OUT_BCS_BATCH(batch, 0);
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790
791     ADVANCE_BCS_BATCH(batch);
792 }
793
794 static void
795 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
796 {
797     struct intel_batchbuffer *batch = encoder_context->base.batch;
798     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
799     struct i965_driver_data *i965 = i965_driver_data(ctx);
800
801     if (IS_STEPPING_BPLUS(i965)) {
802         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
803         return;
804     }
805
806     BEGIN_BCS_BATCH(batch, 4);
807
808     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
809     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
810                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
811                   0);
812     OUT_BCS_BATCH(batch, 0);
813     OUT_BCS_BATCH(batch, 0);
814
815     ADVANCE_BCS_BATCH(batch);
816 }
817
818
819 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
820                                                        struct encode_state *encode_state,
821                                                        struct intel_encoder_context *encoder_context)
822 {
823     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
824
825     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
826     mfc_context->set_surface_state(ctx, encoder_context);
827     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
828     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
829     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
830     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
831     mfc_context->avc_qm_state(ctx, encoder_context);
832     mfc_context->avc_fqm_state(ctx, encoder_context);
833     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
834     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
835 }
836
837
838 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
839                               struct encode_state *encode_state,
840                               struct intel_encoder_context *encoder_context)
841 {
842     struct intel_batchbuffer *batch = encoder_context->base.batch;
843
844     intel_batchbuffer_flush(batch);             //run the pipeline
845
846     return VA_STATUS_SUCCESS;
847 }
848
849
850 static VAStatus
851 gen75_mfc_stop(VADriverContextP ctx, 
852                struct encode_state *encode_state,
853                struct intel_encoder_context *encoder_context,
854                int *encoded_bits_size)
855 {
856     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
857     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
858     VACodedBufferSegment *coded_buffer_segment;
859     
860     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
861     assert(vaStatus == VA_STATUS_SUCCESS);
862     *encoded_bits_size = coded_buffer_segment->size * 8;
863     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
864
865     return VA_STATUS_SUCCESS;
866 }
867
868
869 static void
870 gen75_mfc_avc_slice_state(VADriverContextP ctx,
871                           VAEncPictureParameterBufferH264 *pic_param,
872                           VAEncSliceParameterBufferH264 *slice_param,
873                           struct encode_state *encode_state,
874                           struct intel_encoder_context *encoder_context,
875                           int rate_control_enable,
876                           int qp,
877                           struct intel_batchbuffer *batch)
878 {
879     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
880     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
881     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
882     int beginmb = slice_param->macroblock_address;
883     int endmb = beginmb + slice_param->num_macroblocks;
884     int beginx = beginmb % width_in_mbs;
885     int beginy = beginmb / width_in_mbs;
886     int nextx =  endmb % width_in_mbs;
887     int nexty = endmb / width_in_mbs;
888     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
889     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
890     int maxQpN, maxQpP;
891     unsigned char correct[6], grow, shrink;
892     int i;
893     int weighted_pred_idc = 0;
894     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
895     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
896     int num_ref_l0 = 0, num_ref_l1 = 0;
897
898     if (batch == NULL)
899         batch = encoder_context->base.batch;
900
901     if (slice_type == SLICE_TYPE_I) {
902         luma_log2_weight_denom = 0;
903         chroma_log2_weight_denom = 0;
904     } else if (slice_type == SLICE_TYPE_P) {
905         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
906         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
907
908         if (slice_param->num_ref_idx_active_override_flag)
909             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
910     } else if (slice_type == SLICE_TYPE_B) {
911         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
912         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
913         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
914
915         if (slice_param->num_ref_idx_active_override_flag) {
916             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
917             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
918         }
919
920         if (weighted_pred_idc == 2) {
921             /* 8.4.3 - Derivation process for prediction weights (8-279) */
922             luma_log2_weight_denom = 5;
923             chroma_log2_weight_denom = 5;
924         }
925     }
926
927     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
928     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
929
930     for (i = 0; i < 6; i++)
931         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
932
933     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
934         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
935     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
936         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
937
938     BEGIN_BCS_BATCH(batch, 11);;
939
940     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
941     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
942
943     OUT_BCS_BATCH(batch,
944                   (num_ref_l0 << 16) |
945                   (num_ref_l1 << 24) |
946                   (chroma_log2_weight_denom << 8) |
947                   (luma_log2_weight_denom << 0));
948
949     OUT_BCS_BATCH(batch, 
950                   (weighted_pred_idc << 30) |
951                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
952                   (slice_param->disable_deblocking_filter_idc << 27) |
953                   (slice_param->cabac_init_idc << 24) |
954                   (qp<<16) |                    /*Slice Quantization Parameter*/
955                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
956                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
957     OUT_BCS_BATCH(batch,
958                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
959                   (beginx << 16) |
960                   slice_param->macroblock_address );
961     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
962     OUT_BCS_BATCH(batch, 
963                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
964                   (1 << 30) |           /*ResetRateControlCounter*/
965                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
966                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
967                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
968                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
969                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
970                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
971                   (last_slice << 19) |     /*IsLastSlice*/
972                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
973                   (1 << 17) |       /*HeaderPresentFlag*/       
974                   (1 << 16) |       /*SliceData PresentFlag*/
975                   (1 << 15) |       /*TailPresentFlag*/
976                   (1 << 13) |       /*RBSP NAL TYPE*/   
977                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
978     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
979     OUT_BCS_BATCH(batch,
980                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
981                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
982                   (shrink << 8)  |
983                   (grow << 0));   
984     OUT_BCS_BATCH(batch,
985                   (correct[5] << 20) |
986                   (correct[4] << 16) |
987                   (correct[3] << 12) |
988                   (correct[2] << 8) |
989                   (correct[1] << 4) |
990                   (correct[0] << 0));
991     OUT_BCS_BATCH(batch, 0);
992
993     ADVANCE_BCS_BATCH(batch);
994 }
995
996
997 #if MFC_SOFTWARE_HASWELL
998
999 static int
1000 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1001                                int qp,unsigned int *msg,
1002                                struct intel_encoder_context *encoder_context,
1003                                unsigned char target_mb_size, unsigned char max_mb_size,
1004                                struct intel_batchbuffer *batch)
1005 {
1006     int len_in_dwords = 12;
1007     unsigned int intra_msg;
1008 #define         INTRA_MSG_FLAG          (1 << 13)
1009 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1010     if (batch == NULL)
1011         batch = encoder_context->base.batch;
1012
1013     BEGIN_BCS_BATCH(batch, len_in_dwords);
1014
1015     intra_msg = msg[0] & 0xC0FF;
1016     intra_msg |= INTRA_MSG_FLAG;
1017     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1018     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1019     OUT_BCS_BATCH(batch, 0);
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 
1022                   (0 << 24) |           /* PackedMvNum, Debug*/
1023                   (0 << 20) |           /* No motion vector */
1024                   (1 << 19) |           /* CbpDcY */
1025                   (1 << 18) |           /* CbpDcU */
1026                   (1 << 17) |           /* CbpDcV */
1027                   intra_msg);
1028
1029     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1030     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1031     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1032
1033     /*Stuff for Intra MB*/
1034     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1035     OUT_BCS_BATCH(batch, msg[2]);       
1036     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1037     
1038     /*MaxSizeInWord and TargetSzieInWord*/
1039     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1040                   (target_mb_size << 16) );
1041
1042     OUT_BCS_BATCH(batch, 0);
1043
1044     ADVANCE_BCS_BATCH(batch);
1045
1046     return len_in_dwords;
1047 }
1048
1049 static int
1050 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1051                                unsigned int *msg, unsigned int offset,
1052                                struct intel_encoder_context *encoder_context,
1053                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1054                                struct intel_batchbuffer *batch)
1055 {
1056     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1057     int len_in_dwords = 12;
1058     unsigned int inter_msg = 0;
1059     if (batch == NULL)
1060         batch = encoder_context->base.batch;
1061     {
1062 #define MSG_MV_OFFSET   4
1063         unsigned int *mv_ptr;
1064         mv_ptr = msg + MSG_MV_OFFSET;
1065         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1066          * to convert them to be compatible with the format of AVC_PAK
1067          * command.
1068          */
1069         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1070             /* MV[0] and MV[2] are replicated */
1071             mv_ptr[4] = mv_ptr[0];
1072             mv_ptr[5] = mv_ptr[1];
1073             mv_ptr[2] = mv_ptr[8];
1074             mv_ptr[3] = mv_ptr[9];
1075             mv_ptr[6] = mv_ptr[8];
1076             mv_ptr[7] = mv_ptr[9];
1077         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1078             /* MV[0] and MV[1] are replicated */
1079             mv_ptr[2] = mv_ptr[0];
1080             mv_ptr[3] = mv_ptr[1];
1081             mv_ptr[4] = mv_ptr[16];
1082             mv_ptr[5] = mv_ptr[17];
1083             mv_ptr[6] = mv_ptr[24];
1084             mv_ptr[7] = mv_ptr[25];
1085         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1086                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1087             /* Don't touch MV[0] or MV[1] */
1088             mv_ptr[2] = mv_ptr[8];
1089             mv_ptr[3] = mv_ptr[9];
1090             mv_ptr[4] = mv_ptr[16];
1091             mv_ptr[5] = mv_ptr[17];
1092             mv_ptr[6] = mv_ptr[24];
1093             mv_ptr[7] = mv_ptr[25];
1094         }
1095     }
1096
1097     BEGIN_BCS_BATCH(batch, len_in_dwords);
1098
1099     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1100
1101     inter_msg = 32;
1102     /* MV quantity */
1103     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1104         if (msg[1] & SUBMB_SHAPE_MASK)
1105             inter_msg = 128;
1106     }
1107     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1108     OUT_BCS_BATCH(batch, offset);
1109     inter_msg = msg[0] & (0x1F00FFFF);
1110     inter_msg |= INTER_MV8;
1111     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1112     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1113         (msg[1] & SUBMB_SHAPE_MASK)) {
1114         inter_msg |= INTER_MV32;
1115     }
1116
1117     OUT_BCS_BATCH(batch, inter_msg);
1118
1119     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1120     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1121 #if 0 
1122     if ( slice_type == SLICE_TYPE_B) {
1123         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1124     } else {
1125         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1126     }
1127 #else
1128     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1129 #endif
1130
1131     inter_msg = msg[1] >> 8;
1132     /*Stuff for Inter MB*/
1133     OUT_BCS_BATCH(batch, inter_msg);        
1134     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1135     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1136
1137     /*MaxSizeInWord and TargetSzieInWord*/
1138     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1139                   (target_mb_size << 16) );
1140
1141     OUT_BCS_BATCH(batch, 0x0);    
1142
1143     ADVANCE_BCS_BATCH(batch);
1144
1145     return len_in_dwords;
1146 }
1147
1148 static void 
1149 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1150                                         struct encode_state *encode_state,
1151                                         struct intel_encoder_context *encoder_context,
1152                                         int slice_index,
1153                                         struct intel_batchbuffer *slice_batch)
1154 {
1155     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1156     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1157     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1158     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1159     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1160     unsigned int *msg = NULL, offset = 0;
1161     unsigned char *msg_ptr = NULL;
1162     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1163     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1164     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1165     int i,x,y;
1166     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1167     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1168     unsigned char *slice_header = NULL;
1169     int slice_header_length_in_bits = 0;
1170     unsigned int tail_data[] = { 0x0, 0x0 };
1171     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1172     int is_intra = slice_type == SLICE_TYPE_I;
1173
1174     if (rate_control_mode == VA_RC_CBR) {
1175         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1176         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1177     }
1178
1179     /* only support for 8-bit pixel bit-depth */
1180     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1181     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1182     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1183     assert(qp >= 0 && qp < 52);
1184
1185     gen75_mfc_avc_slice_state(ctx, 
1186                               pPicParameter,
1187                               pSliceParameter,
1188                               encode_state, encoder_context,
1189                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1190
1191     if ( slice_index == 0) 
1192         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1193
1194     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1195
1196     // slice hander
1197     mfc_context->insert_object(ctx, encoder_context,
1198                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1199                                5,  /* first 5 bytes are start code + nal unit type */
1200                                1, 0, 1, slice_batch);
1201
1202     dri_bo_map(vme_context->vme_output.bo , 1);
1203     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1204
1205     if (is_intra) {
1206         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1207     } else {
1208         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1209     }
1210    
1211     for (i = pSliceParameter->macroblock_address; 
1212          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1213         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1214         x = i % width_in_mbs;
1215         y = i / width_in_mbs;
1216         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1217
1218         if (is_intra) {
1219             assert(msg);
1220             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1221         } else {
1222             int inter_rdo, intra_rdo;
1223             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1224             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1225             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1226             if (intra_rdo < inter_rdo) { 
1227                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1228             } else {
1229                 msg += AVC_INTER_MSG_OFFSET;
1230                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1231             }
1232         }
1233     }
1234    
1235     dri_bo_unmap(vme_context->vme_output.bo);
1236
1237     if ( last_slice ) {    
1238         mfc_context->insert_object(ctx, encoder_context,
1239                                    tail_data, 2, 8,
1240                                    2, 1, 1, 0, slice_batch);
1241     } else {
1242         mfc_context->insert_object(ctx, encoder_context,
1243                                    tail_data, 1, 8,
1244                                    1, 1, 1, 0, slice_batch);
1245     }
1246
1247     free(slice_header);
1248
1249 }
1250
1251 static dri_bo *
1252 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1253                                    struct encode_state *encode_state,
1254                                    struct intel_encoder_context *encoder_context)
1255 {
1256     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1257     struct i965_driver_data *i965 = i965_driver_data(ctx);
1258     struct intel_batchbuffer *batch;
1259     dri_bo *batch_bo;
1260     int i;
1261     int buffer_size;
1262
1263     batch = mfc_context->aux_batchbuffer;
1264     batch_bo = batch->buffer;
1265     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1266         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1267     }
1268
1269     intel_batchbuffer_align(batch, 8);
1270     
1271     BEGIN_BCS_BATCH(batch, 2);
1272     OUT_BCS_BATCH(batch, 0);
1273     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1274     ADVANCE_BCS_BATCH(batch);
1275
1276     dri_bo_reference(batch_bo);
1277
1278     intel_batchbuffer_free(batch);
1279     mfc_context->aux_batchbuffer = NULL;
1280
1281     return batch_bo;
1282 }
1283
1284 #else
1285
1286 static void
1287 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1288                                      struct encode_state *encode_state,
1289                                      struct intel_encoder_context *encoder_context)
1290
1291 {
1292     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1293     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1294
1295     assert(vme_context->vme_output.bo);
1296     mfc_context->buffer_suface_setup(ctx,
1297                                      &mfc_context->gpe_context,
1298                                      &vme_context->vme_output,
1299                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1300                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1301 }
1302
1303 static void
1304 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1305                                       struct encode_state *encode_state,
1306                                       struct intel_encoder_context *encoder_context)
1307
1308 {
1309     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1310     assert(mfc_context->aux_batchbuffer_surface.bo);
1311     mfc_context->buffer_suface_setup(ctx,
1312                                      &mfc_context->gpe_context,
1313                                      &mfc_context->aux_batchbuffer_surface,
1314                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1315                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1316 }
1317
1318 static void
1319 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1320                                      struct encode_state *encode_state,
1321                                      struct intel_encoder_context *encoder_context)
1322 {
1323     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1324     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1325 }
1326
1327 static void
1328 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1329                                  struct encode_state *encode_state,
1330                                  struct intel_encoder_context *encoder_context)
1331 {
1332     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1333     struct gen6_interface_descriptor_data *desc;   
1334     int i;
1335     dri_bo *bo;
1336
1337     bo = mfc_context->gpe_context.idrt.bo;
1338     dri_bo_map(bo, 1);
1339     assert(bo->virtual);
1340     desc = bo->virtual;
1341
1342     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1343         struct i965_kernel *kernel;
1344
1345         kernel = &mfc_context->gpe_context.kernels[i];
1346         assert(sizeof(*desc) == 32);
1347
1348         /*Setup the descritor table*/
1349         memset(desc, 0, sizeof(*desc));
1350         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1351         desc->desc2.sampler_count = 0;
1352         desc->desc2.sampler_state_pointer = 0;
1353         desc->desc3.binding_table_entry_count = 2;
1354         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1355         desc->desc4.constant_urb_entry_read_offset = 0;
1356         desc->desc4.constant_urb_entry_read_length = 4;
1357                 
1358         /*kernel start*/
1359         dri_bo_emit_reloc(bo,   
1360                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1361                           0,
1362                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1363                           kernel->bo);
1364         desc++;
1365     }
1366
1367     dri_bo_unmap(bo);
1368 }
1369
1370 static void
1371 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1372                                      struct encode_state *encode_state,
1373                                      struct intel_encoder_context *encoder_context)
1374 {
1375     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1376     
1377     (void)mfc_context;
1378 }
1379
1380 #define AVC_PAK_LEN_IN_BYTE     48
1381 #define AVC_PAK_LEN_IN_OWORD    3
1382
1383 static void
1384 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1385                                           uint32_t intra_flag,
1386                                           int head_offset,
1387                                           int number_mb_cmds,
1388                                           int slice_end_x,
1389                                           int slice_end_y,
1390                                           int mb_x,
1391                                           int mb_y,
1392                                           int width_in_mbs,
1393                                           int qp,
1394                                           uint32_t fwd_ref,
1395                                           uint32_t bwd_ref)
1396 {
1397     uint32_t temp_value;
1398     BEGIN_BATCH(batch, 14);
1399     
1400     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1401     OUT_BATCH(batch, 0);
1402     OUT_BATCH(batch, 0);
1403     OUT_BATCH(batch, 0);
1404     OUT_BATCH(batch, 0);
1405     OUT_BATCH(batch, 0);
1406    
1407     /*inline data */
1408     OUT_BATCH(batch, head_offset / 16);
1409     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1410     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1411     OUT_BATCH(batch, temp_value);
1412
1413     OUT_BATCH(batch, number_mb_cmds);
1414
1415     OUT_BATCH(batch,
1416               ((slice_end_y << 8) | (slice_end_x)));
1417     OUT_BATCH(batch, fwd_ref);
1418     OUT_BATCH(batch, bwd_ref);
1419
1420     OUT_BATCH(batch, MI_NOOP);
1421
1422     ADVANCE_BATCH(batch);
1423 }
1424
1425 static void
1426 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1427                                         struct intel_encoder_context *encoder_context,
1428                                         VAEncSliceParameterBufferH264 *slice_param,
1429                                         int head_offset,
1430                                         int qp,
1431                                         int last_slice)
1432 {
1433     struct intel_batchbuffer *batch = encoder_context->base.batch;
1434     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1435     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1436     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1437     int total_mbs = slice_param->num_macroblocks;
1438     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1439     int number_mb_cmds = 128;
1440     int starting_offset = 0;
1441     int mb_x, mb_y;
1442     int last_mb, slice_end_x, slice_end_y;
1443     int remaining_mb = total_mbs;
1444     uint32_t fwd_ref , bwd_ref, mb_flag;
1445
1446     last_mb = slice_param->macroblock_address + total_mbs - 1;
1447     slice_end_x = last_mb % width_in_mbs;
1448     slice_end_y = last_mb / width_in_mbs;
1449
1450     if (slice_type == SLICE_TYPE_I) {
1451         fwd_ref = 0;
1452         bwd_ref = 0;
1453         mb_flag = 1;
1454     } else {
1455         fwd_ref = vme_context->ref_index_in_mb[0];
1456         bwd_ref = vme_context->ref_index_in_mb[1];
1457         mb_flag = 0;
1458     }
1459
1460     if (width_in_mbs >= 100) {
1461         number_mb_cmds = width_in_mbs / 5;
1462     } else if (width_in_mbs >= 80) {
1463         number_mb_cmds = width_in_mbs / 4;
1464     } else if (width_in_mbs >= 60) {
1465         number_mb_cmds = width_in_mbs / 3;
1466     } else if (width_in_mbs >= 40) {
1467         number_mb_cmds = width_in_mbs / 2;
1468     } else {
1469         number_mb_cmds = width_in_mbs;
1470     }
1471
1472     do {
1473         if (number_mb_cmds >= remaining_mb) {
1474                 number_mb_cmds = remaining_mb;
1475         }
1476         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1477         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1478
1479         gen75_mfc_batchbuffer_emit_object_command(batch,
1480                                                   mb_flag,
1481                                                   head_offset,
1482                                                   number_mb_cmds,
1483                                                   slice_end_x,
1484                                                   slice_end_y,
1485                                                   mb_x,
1486                                                   mb_y,
1487                                                   width_in_mbs,
1488                                                   qp,
1489                                                   fwd_ref,
1490                                                   bwd_ref);
1491
1492         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1493         remaining_mb -= number_mb_cmds;
1494         starting_offset += number_mb_cmds;
1495     } while (remaining_mb > 0);
1496 }
1497                           
1498 /*
1499  * return size in Owords (16bytes)
1500  */         
1501 static void
1502 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1503                                 struct encode_state *encode_state,
1504                                 struct intel_encoder_context *encoder_context,
1505                                 int slice_index)
1506 {
1507     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1508     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1509     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1510     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1511     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1512     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1513     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1514     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1515     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1516     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1517     unsigned char *slice_header = NULL;
1518     int slice_header_length_in_bits = 0;
1519     unsigned int tail_data[] = { 0x0, 0x0 };
1520     long head_offset;
1521     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1522
1523     if (rate_control_mode == VA_RC_CBR) {
1524         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1525         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1526     }
1527
1528     /* only support for 8-bit pixel bit-depth */
1529     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1530     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1531     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1532     assert(qp >= 0 && qp < 52);
1533
1534     gen75_mfc_avc_slice_state(ctx,
1535                               pPicParameter,
1536                               pSliceParameter,
1537                               encode_state,
1538                               encoder_context,
1539                               (rate_control_mode == VA_RC_CBR),
1540                               qp,
1541                               slice_batch);
1542
1543     if (slice_index == 0)
1544         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1545
1546     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1547
1548     // slice hander
1549     mfc_context->insert_object(ctx,
1550                                encoder_context,
1551                                (unsigned int *)slice_header,
1552                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1553                                slice_header_length_in_bits & 0x1f,
1554                                5,  /* first 5 bytes are start code + nal unit type */
1555                                1,
1556                                0,
1557                                1,
1558                                slice_batch);
1559     free(slice_header);
1560
1561     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1562     head_offset = intel_batchbuffer_used_size(slice_batch);
1563
1564     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1565
1566     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1567                                             encoder_context,
1568                                             pSliceParameter,
1569                                             head_offset,
1570                                             qp,
1571                                             last_slice);
1572
1573
1574     /* Aligned for tail */
1575     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1576     if (last_slice) {    
1577         mfc_context->insert_object(ctx,
1578                                    encoder_context,
1579                                    tail_data,
1580                                    2,
1581                                    8,
1582                                    2,
1583                                    1,
1584                                    1,
1585                                    0,
1586                                    slice_batch);
1587     } else {
1588         mfc_context->insert_object(ctx,
1589                                    encoder_context,
1590                                    tail_data,
1591                                    1,
1592                                    8,
1593                                    1,
1594                                    1,
1595                                    1,
1596                                    0,
1597                                    slice_batch);
1598     }
1599
1600     return;
1601 }
1602
1603 static void
1604 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1605                                    struct encode_state *encode_state,
1606                                    struct intel_encoder_context *encoder_context)
1607 {
1608     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1609     struct intel_batchbuffer *batch = encoder_context->base.batch;
1610     int i;
1611     intel_batchbuffer_start_atomic(batch, 0x4000); 
1612     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1613
1614     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1615         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1616     }
1617     {
1618         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1619         intel_batchbuffer_align(slice_batch, 8);
1620         BEGIN_BCS_BATCH(slice_batch, 2);
1621         OUT_BCS_BATCH(slice_batch, 0);
1622         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1623         ADVANCE_BCS_BATCH(slice_batch);
1624     }
1625     intel_batchbuffer_end_atomic(batch);
1626     intel_batchbuffer_flush(batch);
1627 }
1628
1629 static void
1630 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1631                                 struct encode_state *encode_state,
1632                                 struct intel_encoder_context *encoder_context)
1633 {
1634     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1635     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1636     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1637     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1638 }
1639
1640 static dri_bo *
1641 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1642                                    struct encode_state *encode_state,
1643                                    struct intel_encoder_context *encoder_context)
1644 {
1645     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1646
1647     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1648     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1649
1650     return mfc_context->aux_batchbuffer_surface.bo;
1651 }
1652
1653 #endif
1654
1655 static void
1656 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1657                                   struct encode_state *encode_state,
1658                                   struct intel_encoder_context *encoder_context)
1659 {
1660     struct intel_batchbuffer *batch = encoder_context->base.batch;
1661     dri_bo *slice_batch_bo;
1662
1663     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1664         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1665         assert(0);
1666         return; 
1667     }
1668
1669 #if MFC_SOFTWARE_HASWELL
1670     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1671 #else
1672     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1673 #endif
1674
1675     // begin programing
1676     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1677     intel_batchbuffer_emit_mi_flush(batch);
1678     
1679     // picture level programing
1680     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1681
1682     BEGIN_BCS_BATCH(batch, 2);
1683     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1684     OUT_BCS_RELOC(batch,
1685                   slice_batch_bo,
1686                   I915_GEM_DOMAIN_COMMAND, 0, 
1687                   0);
1688     ADVANCE_BCS_BATCH(batch);
1689
1690     // end programing
1691     intel_batchbuffer_end_atomic(batch);
1692
1693     dri_bo_unreference(slice_batch_bo);
1694 }
1695
1696
1697 static VAStatus
1698 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1699                              struct encode_state *encode_state,
1700                              struct intel_encoder_context *encoder_context)
1701 {
1702     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1703     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1704     int current_frame_bits_size;
1705     int sts;
1706  
1707     for (;;) {
1708         gen75_mfc_init(ctx, encode_state, encoder_context);
1709         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1710         /*Programing bcs pipeline*/
1711         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1712         gen75_mfc_run(ctx, encode_state, encoder_context);
1713         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1714             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1715             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1716             if (sts == BRC_NO_HRD_VIOLATION) {
1717                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1718                 break;
1719             }
1720             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1721                 if (!mfc_context->hrd.violation_noted) {
1722                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1723                     mfc_context->hrd.violation_noted = 1;
1724                 }
1725                 return VA_STATUS_SUCCESS;
1726             }
1727         } else {
1728             break;
1729         }
1730     }
1731
1732     return VA_STATUS_SUCCESS;
1733 }
1734
1735 /*
1736  * MPEG-2
1737  */
1738
1739 static const int
1740 va_to_gen75_mpeg2_picture_type[3] = {
1741     1,  /* I */
1742     2,  /* P */
1743     3   /* B */
1744 };
1745
1746 static void
1747 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1748                           struct intel_encoder_context *encoder_context,
1749                           struct encode_state *encode_state)
1750 {
1751     struct intel_batchbuffer *batch = encoder_context->base.batch;
1752     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1753     VAEncPictureParameterBufferMPEG2 *pic_param;
1754     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1755     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1756     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1757
1758     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1759     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1760     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1761
1762     BEGIN_BCS_BATCH(batch, 13);
1763     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1764     OUT_BCS_BATCH(batch,
1765                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1766                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1767                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1768                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1769                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1770                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1771                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1772                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1773                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1774                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1775                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1776                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1777     OUT_BCS_BATCH(batch,
1778                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1779                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1780                   0);
1781     OUT_BCS_BATCH(batch,
1782                   1 << 31 |     /* slice concealment */
1783                   (height_in_mbs - 1) << 16 |
1784                   (width_in_mbs - 1));
1785     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1786         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1787     else
1788         OUT_BCS_BATCH(batch, 0);
1789
1790     OUT_BCS_BATCH(batch, 0);
1791     OUT_BCS_BATCH(batch,
1792                   0xFFF << 16 | /* InterMBMaxSize */
1793                   0xFFF << 0 |  /* IntraMBMaxSize */
1794                   0);
1795     OUT_BCS_BATCH(batch, 0);
1796     OUT_BCS_BATCH(batch, 0);
1797     OUT_BCS_BATCH(batch, 0);
1798     OUT_BCS_BATCH(batch, 0);
1799     OUT_BCS_BATCH(batch, 0);
1800     OUT_BCS_BATCH(batch, 0);
1801     ADVANCE_BCS_BATCH(batch);
1802 }
1803
1804 static void
1805 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1806 {
1807     unsigned char intra_qm[64] = {
1808         8, 16, 19, 22, 26, 27, 29, 34,
1809         16, 16, 22, 24, 27, 29, 34, 37,
1810         19, 22, 26, 27, 29, 34, 34, 38,
1811         22, 22, 26, 27, 29, 34, 37, 40,
1812         22, 26, 27, 29, 32, 35, 40, 48,
1813         26, 27, 29, 32, 35, 40, 48, 58,
1814         26, 27, 29, 34, 38, 46, 56, 69,
1815         27, 29, 35, 38, 46, 56, 69, 83
1816     };
1817
1818     unsigned char non_intra_qm[64] = {
1819         16, 16, 16, 16, 16, 16, 16, 16,
1820         16, 16, 16, 16, 16, 16, 16, 16,
1821         16, 16, 16, 16, 16, 16, 16, 16,
1822         16, 16, 16, 16, 16, 16, 16, 16,
1823         16, 16, 16, 16, 16, 16, 16, 16,
1824         16, 16, 16, 16, 16, 16, 16, 16,
1825         16, 16, 16, 16, 16, 16, 16, 16,
1826         16, 16, 16, 16, 16, 16, 16, 16
1827     };
1828
1829     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1830     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1831 }
1832
1833 static void
1834 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1835 {
1836     unsigned short intra_fqm[64] = {
1837         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1838         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1839         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1840         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1841         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1842         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1843         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1844         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1845     };
1846
1847     unsigned short non_intra_fqm[64] = {
1848         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1849         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1850         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1851         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1852         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1853         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1854         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1855         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1856     };
1857
1858     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1859     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1860 }
1861
1862 static void
1863 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1864                                  struct intel_encoder_context *encoder_context,
1865                                  int x, int y,
1866                                  int next_x, int next_y,
1867                                  int is_fisrt_slice_group,
1868                                  int is_last_slice_group,
1869                                  int intra_slice,
1870                                  int qp,
1871                                  struct intel_batchbuffer *batch)
1872 {
1873     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1874
1875     if (batch == NULL)
1876         batch = encoder_context->base.batch;
1877
1878     BEGIN_BCS_BATCH(batch, 8);
1879
1880     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1881     OUT_BCS_BATCH(batch,
1882                   0 << 31 |                             /* MbRateCtrlFlag */
1883                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1884                   1 << 17 |                             /* Insert Header before the first slice group data */
1885                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1886                   1 << 15 |                             /* TailPresentFlag: always 1 */
1887                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1888                   !!intra_slice << 13 |                 /* IntraSlice */
1889                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1890                   0);
1891     OUT_BCS_BATCH(batch,
1892                   next_y << 24 |
1893                   next_x << 16 |
1894                   y << 8 |
1895                   x << 0 |
1896                   0);
1897     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1898     /* bitstream pointer is only loaded once for the first slice of a frame when 
1899      * LoadSlicePointerFlag is 0
1900      */
1901     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1902     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1903     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1904     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1905
1906     ADVANCE_BCS_BATCH(batch);
1907 }
1908
1909 static int
1910 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1911                                  struct intel_encoder_context *encoder_context,
1912                                  int x, int y,
1913                                  int first_mb_in_slice,
1914                                  int last_mb_in_slice,
1915                                  int first_mb_in_slice_group,
1916                                  int last_mb_in_slice_group,
1917                                  int mb_type,
1918                                  int qp_scale_code,
1919                                  int coded_block_pattern,
1920                                  unsigned char target_size_in_word,
1921                                  unsigned char max_size_in_word,
1922                                  struct intel_batchbuffer *batch)
1923 {
1924     int len_in_dwords = 9;
1925
1926     if (batch == NULL)
1927         batch = encoder_context->base.batch;
1928
1929     BEGIN_BCS_BATCH(batch, len_in_dwords);
1930
1931     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1932     OUT_BCS_BATCH(batch,
1933                   0 << 24 |     /* PackedMvNum */
1934                   0 << 20 |     /* MvFormat */
1935                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1936                   0 << 15 |     /* TransformFlag: frame DCT */
1937                   0 << 14 |     /* FieldMbFlag */
1938                   1 << 13 |     /* IntraMbFlag */
1939                   mb_type << 8 |   /* MbType: Intra */
1940                   0 << 2 |      /* SkipMbFlag */
1941                   0 << 0 |      /* InterMbMode */
1942                   0);
1943     OUT_BCS_BATCH(batch, y << 16 | x);
1944     OUT_BCS_BATCH(batch,
1945                   max_size_in_word << 24 |
1946                   target_size_in_word << 16 |
1947                   coded_block_pattern << 6 |      /* CBP */
1948                   0);
1949     OUT_BCS_BATCH(batch,
1950                   last_mb_in_slice << 31 |
1951                   first_mb_in_slice << 30 |
1952                   0 << 27 |     /* EnableCoeffClamp */
1953                   last_mb_in_slice_group << 26 |
1954                   0 << 25 |     /* MbSkipConvDisable */
1955                   first_mb_in_slice_group << 24 |
1956                   0 << 16 |     /* MvFieldSelect */
1957                   qp_scale_code << 0 |
1958                   0);
1959     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1960     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1961     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1962     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1963
1964     ADVANCE_BCS_BATCH(batch);
1965
1966     return len_in_dwords;
1967 }
1968
1969 #define MPEG2_INTER_MV_OFFSET   12 
1970
1971 static struct _mv_ranges
1972 {
1973     int low;    /* in the unit of 1/2 pixel */
1974     int high;   /* in the unit of 1/2 pixel */
1975 } mv_ranges[] = {
1976     {0, 0},
1977     {-16, 15},
1978     {-32, 31},
1979     {-64, 63},
1980     {-128, 127},
1981     {-256, 255},
1982     {-512, 511},
1983     {-1024, 1023},
1984     {-2048, 2047},
1985     {-4096, 4095}
1986 };
1987
1988 static int
1989 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1990 {
1991     if (mv + pos * 16 * 2 < 0 ||
1992         mv + (pos + 1) * 16 * 2 > display_max * 2)
1993         mv = 0;
1994
1995     if (f_code > 0 && f_code < 10) {
1996         if (mv < mv_ranges[f_code].low)
1997             mv = mv_ranges[f_code].low;
1998
1999         if (mv > mv_ranges[f_code].high)
2000             mv = mv_ranges[f_code].high;
2001     }
2002
2003     return mv;
2004 }
2005
2006 static int
2007 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2008                                  struct encode_state *encode_state,
2009                                  struct intel_encoder_context *encoder_context,
2010                                  unsigned int *msg,
2011                                  int width_in_mbs, int height_in_mbs,
2012                                  int x, int y,
2013                                  int first_mb_in_slice,
2014                                  int last_mb_in_slice,
2015                                  int first_mb_in_slice_group,
2016                                  int last_mb_in_slice_group,
2017                                  int qp_scale_code,
2018                                  unsigned char target_size_in_word,
2019                                  unsigned char max_size_in_word,
2020                                  struct intel_batchbuffer *batch)
2021 {
2022     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2023     int len_in_dwords = 9;
2024     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2025     
2026     if (batch == NULL)
2027         batch = encoder_context->base.batch;
2028
2029     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2030     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2031     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2032     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2033     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2034
2035     BEGIN_BCS_BATCH(batch, len_in_dwords);
2036
2037     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2038     OUT_BCS_BATCH(batch,
2039                   2 << 24 |     /* PackedMvNum */
2040                   7 << 20 |     /* MvFormat */
2041                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2042                   0 << 15 |     /* TransformFlag: frame DCT */
2043                   0 << 14 |     /* FieldMbFlag */
2044                   0 << 13 |     /* IntraMbFlag */
2045                   1 << 8 |      /* MbType: Frame-based */
2046                   0 << 2 |      /* SkipMbFlag */
2047                   0 << 0 |      /* InterMbMode */
2048                   0);
2049     OUT_BCS_BATCH(batch, y << 16 | x);
2050     OUT_BCS_BATCH(batch,
2051                   max_size_in_word << 24 |
2052                   target_size_in_word << 16 |
2053                   0x3f << 6 |   /* CBP */
2054                   0);
2055     OUT_BCS_BATCH(batch,
2056                   last_mb_in_slice << 31 |
2057                   first_mb_in_slice << 30 |
2058                   0 << 27 |     /* EnableCoeffClamp */
2059                   last_mb_in_slice_group << 26 |
2060                   0 << 25 |     /* MbSkipConvDisable */
2061                   first_mb_in_slice_group << 24 |
2062                   0 << 16 |     /* MvFieldSelect */
2063                   qp_scale_code << 0 |
2064                   0);
2065
2066     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2067     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2068     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2069     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2070
2071     ADVANCE_BCS_BATCH(batch);
2072
2073     return len_in_dwords;
2074 }
2075
2076 static void
2077 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2078                                            struct encode_state *encode_state,
2079                                            struct intel_encoder_context *encoder_context,
2080                                            struct intel_batchbuffer *slice_batch)
2081 {
2082     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2083     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2084
2085     if (encode_state->packed_header_data[idx]) {
2086         VAEncPackedHeaderParameterBuffer *param = NULL;
2087         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2088         unsigned int length_in_bits;
2089
2090         assert(encode_state->packed_header_param[idx]);
2091         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2092         length_in_bits = param->bit_length;
2093
2094         mfc_context->insert_object(ctx,
2095                                    encoder_context,
2096                                    header_data,
2097                                    ALIGN(length_in_bits, 32) >> 5,
2098                                    length_in_bits & 0x1f,
2099                                    5,   /* FIXME: check it */
2100                                    0,
2101                                    0,
2102                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2103                                    slice_batch);
2104     }
2105
2106     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2107
2108     if (encode_state->packed_header_data[idx]) {
2109         VAEncPackedHeaderParameterBuffer *param = NULL;
2110         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2111         unsigned int length_in_bits;
2112
2113         assert(encode_state->packed_header_param[idx]);
2114         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2115         length_in_bits = param->bit_length;
2116
2117         mfc_context->insert_object(ctx,
2118                                    encoder_context,
2119                                    header_data,
2120                                    ALIGN(length_in_bits, 32) >> 5,
2121                                    length_in_bits & 0x1f,
2122                                    5,   /* FIXME: check it */
2123                                    0,
2124                                    0,
2125                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2126                                    slice_batch);
2127     }
2128 }
2129
2130 static void 
2131 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2132                                      struct encode_state *encode_state,
2133                                      struct intel_encoder_context *encoder_context,
2134                                      int slice_index,
2135                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2136                                      struct intel_batchbuffer *slice_batch)
2137 {
2138     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2139     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2140     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2141     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2142     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2143     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2144     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2145     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2146     int i, j;
2147     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2148     unsigned int *msg = NULL;
2149     unsigned char *msg_ptr = NULL;
2150
2151     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2152     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2153     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2154     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2155
2156     dri_bo_map(vme_context->vme_output.bo , 0);
2157     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2158
2159     if (next_slice_group_param) {
2160         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2161         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2162     } else {
2163         h_next_start_pos = 0;
2164         v_next_start_pos = height_in_mbs;
2165     }
2166
2167     gen75_mfc_mpeg2_slicegroup_state(ctx,
2168                                      encoder_context,
2169                                      h_start_pos,
2170                                      v_start_pos,
2171                                      h_next_start_pos,
2172                                      v_next_start_pos,
2173                                      slice_index == 0,
2174                                      next_slice_group_param == NULL,
2175                                      slice_param->is_intra_slice,
2176                                      slice_param->quantiser_scale_code,
2177                                      slice_batch);
2178
2179     if (slice_index == 0) 
2180         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2181
2182     /* Insert '00' to make sure the header is valid */
2183     mfc_context->insert_object(ctx,
2184                                encoder_context,
2185                                (unsigned int*)section_delimiter,
2186                                1,
2187                                8,   /* 8bits in the last DWORD */
2188                                1,   /* 1 byte */
2189                                1,
2190                                0,
2191                                0,
2192                                slice_batch);
2193
2194     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2195         /* PAK for each macroblocks */
2196         for (j = 0; j < slice_param->num_macroblocks; j++) {
2197             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2198             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2199             int first_mb_in_slice = (j == 0);
2200             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2201             int first_mb_in_slice_group = (i == 0 && j == 0);
2202             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2203                                           j == slice_param->num_macroblocks - 1);
2204
2205             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2206
2207             if (slice_param->is_intra_slice) {
2208                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2209                                                  encoder_context,
2210                                                  h_pos, v_pos,
2211                                                  first_mb_in_slice,
2212                                                  last_mb_in_slice,
2213                                                  first_mb_in_slice_group,
2214                                                  last_mb_in_slice_group,
2215                                                  0x1a,
2216                                                  slice_param->quantiser_scale_code,
2217                                                  0x3f,
2218                                                  0,
2219                                                  0xff,
2220                                                  slice_batch);
2221             } else {
2222                 int inter_rdo, intra_rdo;
2223                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2224                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2225
2226                 if (intra_rdo < inter_rdo) 
2227                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2228                                                      encoder_context,
2229                                                      h_pos, v_pos,
2230                                                      first_mb_in_slice,
2231                                                      last_mb_in_slice,
2232                                                      first_mb_in_slice_group,
2233                                                      last_mb_in_slice_group,
2234                                                      0x1a,
2235                                                      slice_param->quantiser_scale_code,
2236                                                      0x3f,
2237                                                      0,
2238                                                      0xff,
2239                                                      slice_batch);
2240                 else
2241                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2242                                                      encode_state,
2243                                                      encoder_context,
2244                                                      msg,
2245                                                      width_in_mbs, height_in_mbs,
2246                                                      h_pos, v_pos,
2247                                                      first_mb_in_slice,
2248                                                      last_mb_in_slice,
2249                                                      first_mb_in_slice_group,
2250                                                      last_mb_in_slice_group,
2251                                                      slice_param->quantiser_scale_code,
2252                                                      0,
2253                                                      0xff,
2254                                                      slice_batch);
2255             }
2256         }
2257
2258         slice_param++;
2259     }
2260
2261     dri_bo_unmap(vme_context->vme_output.bo);
2262
2263     /* tail data */
2264     if (next_slice_group_param == NULL) { /* end of a picture */
2265         mfc_context->insert_object(ctx,
2266                                    encoder_context,
2267                                    (unsigned int *)tail_delimiter,
2268                                    2,
2269                                    8,   /* 8bits in the last DWORD */
2270                                    5,   /* 5 bytes */
2271                                    1,
2272                                    1,
2273                                    0,
2274                                    slice_batch);
2275     } else {        /* end of a lsice group */
2276         mfc_context->insert_object(ctx,
2277                                    encoder_context,
2278                                    (unsigned int *)section_delimiter,
2279                                    1,
2280                                    8,   /* 8bits in the last DWORD */
2281                                    1,   /* 1 byte */
2282                                    1,
2283                                    1,
2284                                    0,
2285                                    slice_batch);
2286     }
2287 }
2288
2289 /* 
2290  * A batch buffer for all slices, including slice state, 
2291  * slice insert object and slice pak object commands
2292  *
2293  */
2294 static dri_bo *
2295 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2296                                            struct encode_state *encode_state,
2297                                            struct intel_encoder_context *encoder_context)
2298 {
2299     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2300     struct intel_batchbuffer *batch;
2301     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2302     dri_bo *batch_bo;
2303     int i;
2304
2305     batch = mfc_context->aux_batchbuffer;
2306     batch_bo = batch->buffer;
2307
2308     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2309         if (i == encode_state->num_slice_params_ext - 1)
2310             next_slice_group_param = NULL;
2311         else
2312             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2313
2314         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2315     }
2316
2317     intel_batchbuffer_align(batch, 8);
2318     
2319     BEGIN_BCS_BATCH(batch, 2);
2320     OUT_BCS_BATCH(batch, 0);
2321     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2322     ADVANCE_BCS_BATCH(batch);
2323
2324     dri_bo_reference(batch_bo);
2325     intel_batchbuffer_free(batch);
2326     mfc_context->aux_batchbuffer = NULL;
2327
2328     return batch_bo;
2329 }
2330
2331 static void
2332 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2333                                             struct encode_state *encode_state,
2334                                             struct intel_encoder_context *encoder_context)
2335 {
2336     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2337
2338     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2339     mfc_context->set_surface_state(ctx, encoder_context);
2340     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2341     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2342     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2343     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2344     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2345     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2346 }
2347
2348 static void
2349 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2350                                     struct encode_state *encode_state,
2351                                     struct intel_encoder_context *encoder_context)
2352 {
2353     struct intel_batchbuffer *batch = encoder_context->base.batch;
2354     dri_bo *slice_batch_bo;
2355
2356     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2357
2358     // begin programing
2359     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2360     intel_batchbuffer_emit_mi_flush(batch);
2361     
2362     // picture level programing
2363     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2364
2365     BEGIN_BCS_BATCH(batch, 2);
2366     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2367     OUT_BCS_RELOC(batch,
2368                   slice_batch_bo,
2369                   I915_GEM_DOMAIN_COMMAND, 0, 
2370                   0);
2371     ADVANCE_BCS_BATCH(batch);
2372
2373     // end programing
2374     intel_batchbuffer_end_atomic(batch);
2375
2376     dri_bo_unreference(slice_batch_bo);
2377 }
2378
2379 static VAStatus
2380 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2381                         struct encode_state *encode_state,
2382                         struct intel_encoder_context *encoder_context)
2383 {
2384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2385     struct object_surface *obj_surface; 
2386     struct object_buffer *obj_buffer;
2387     struct i965_coded_buffer_segment *coded_buffer_segment;
2388     VAStatus vaStatus = VA_STATUS_SUCCESS;
2389     dri_bo *bo;
2390     int i;
2391
2392     /* reconstructed surface */
2393     obj_surface = encode_state->reconstructed_object;
2394     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2395     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2396     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2397     mfc_context->surface_state.width = obj_surface->orig_width;
2398     mfc_context->surface_state.height = obj_surface->orig_height;
2399     mfc_context->surface_state.w_pitch = obj_surface->width;
2400     mfc_context->surface_state.h_pitch = obj_surface->height;
2401
2402     /* forward reference */
2403     obj_surface = encode_state->reference_objects[0];
2404
2405     if (obj_surface && obj_surface->bo) {
2406         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2407         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2408     } else
2409         mfc_context->reference_surfaces[0].bo = NULL;
2410
2411     /* backward reference */
2412     obj_surface = encode_state->reference_objects[1];
2413
2414     if (obj_surface && obj_surface->bo) {
2415         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2416         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2417     } else {
2418         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2419
2420         if (mfc_context->reference_surfaces[1].bo)
2421             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2422     }
2423
2424     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2425         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2426
2427         if (mfc_context->reference_surfaces[i].bo)
2428             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2429     }
2430     
2431     /* input YUV surface */
2432     obj_surface = encode_state->input_yuv_object;
2433     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2434     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2435
2436     /* coded buffer */
2437     obj_buffer = encode_state->coded_buf_object;
2438     bo = obj_buffer->buffer_store->bo;
2439     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2440     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2441     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2442     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2443
2444     /* set the internal flag to 0 to indicate the coded size is unknown */
2445     dri_bo_map(bo, 1);
2446     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2447     coded_buffer_segment->mapped = 0;
2448     coded_buffer_segment->codec = encoder_context->codec;
2449     dri_bo_unmap(bo);
2450
2451     return vaStatus;
2452 }
2453
2454 static VAStatus
2455 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2456                                struct encode_state *encode_state,
2457                                struct intel_encoder_context *encoder_context)
2458 {
2459     gen75_mfc_init(ctx, encode_state, encoder_context);
2460     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2461     /*Programing bcs pipeline*/
2462     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2463     gen75_mfc_run(ctx, encode_state, encoder_context);
2464
2465     return VA_STATUS_SUCCESS;
2466 }
2467
2468 static void
2469 gen75_mfc_context_destroy(void *context)
2470 {
2471     struct gen6_mfc_context *mfc_context = context;
2472     int i;
2473
2474     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2475     mfc_context->post_deblocking_output.bo = NULL;
2476
2477     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2478     mfc_context->pre_deblocking_output.bo = NULL;
2479
2480     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2481     mfc_context->uncompressed_picture_source.bo = NULL;
2482
2483     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2484     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2485
2486     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2487         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2488         mfc_context->direct_mv_buffers[i].bo = NULL;
2489     }
2490
2491     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2492     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2493
2494     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2495     mfc_context->macroblock_status_buffer.bo = NULL;
2496
2497     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2498     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2499
2500     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2501     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2502
2503     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2504         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2505         mfc_context->reference_surfaces[i].bo = NULL;  
2506     }
2507
2508     i965_gpe_context_destroy(&mfc_context->gpe_context);
2509
2510     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2511     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2512
2513     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2514     mfc_context->aux_batchbuffer_surface.bo = NULL;
2515
2516     if (mfc_context->aux_batchbuffer)
2517         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2518
2519     mfc_context->aux_batchbuffer = NULL;
2520
2521     free(mfc_context);
2522 }
2523
2524 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2525                                    VAProfile profile,
2526                                    struct encode_state *encode_state,
2527                                    struct intel_encoder_context *encoder_context)
2528 {
2529     VAStatus vaStatus;
2530
2531     switch (profile) {
2532     case VAProfileH264ConstrainedBaseline:
2533     case VAProfileH264Main:
2534     case VAProfileH264High:
2535         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2536         break;
2537
2538         /* FIXME: add for other profile */
2539     case VAProfileMPEG2Simple:
2540     case VAProfileMPEG2Main:
2541         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2542         break;
2543
2544     default:
2545         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2546         break;
2547     }
2548
2549     return vaStatus;
2550 }
2551
2552 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2553 {
2554     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2555
2556     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2557
2558     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2559     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2560
2561     mfc_context->gpe_context.curbe.length = 32 * 4;
2562
2563     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2564     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2565     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2566     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2567     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2568
2569     i965_gpe_load_kernels(ctx,
2570                           &mfc_context->gpe_context,
2571                           gen75_mfc_kernels,
2572                           1);
2573
2574     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2575     mfc_context->set_surface_state = gen75_mfc_surface_state;
2576     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2577     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2578     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2579     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2580     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2581     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2582
2583     encoder_context->mfc_context = mfc_context;
2584     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2585     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2586     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2587
2588     return True;
2589 }