decoder: h264: fix frame store logic for MVC.
[platform/upstream/libva-intel-driver.git] / src / gen7_mfc.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *    Xiang, Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen6_mfc.h"
42 #include "gen6_vme.h"
43
44 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
45 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
46 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
47
48 extern void
49 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, 
50                              struct intel_encoder_context *encoder_context);
51 extern void
52 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, 
53                                  struct intel_encoder_context *encoder_context);
54 extern void 
55 gen6_mfc_init(VADriverContextP ctx, 
56               struct encode_state *encode_state,
57               struct intel_encoder_context *encoder_context);
58
59 extern VAStatus
60 gen6_mfc_run(VADriverContextP ctx, 
61              struct encode_state *encode_state,
62              struct intel_encoder_context *encoder_context);
63
64 extern VAStatus
65 gen6_mfc_stop(VADriverContextP ctx, 
66               struct encode_state *encode_state,
67               struct intel_encoder_context *encoder_context,
68               int *encoded_bits_size);
69
70 extern VAStatus
71 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
72                             struct encode_state *encode_state,
73                             struct intel_encoder_context *encoder_context);
74
75 static const uint32_t gen7_mfc_batchbuffer_avc_intra[][4] = {
76 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
77 };
78
79 static const uint32_t gen7_mfc_batchbuffer_avc_inter[][4] = {
80 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
81 };
82
83 static struct i965_kernel gen7_mfc_kernels[] = {
84     {
85         "MFC AVC INTRA BATCHBUFFER ",
86         MFC_BATCHBUFFER_AVC_INTRA,
87         gen7_mfc_batchbuffer_avc_intra,
88         sizeof(gen7_mfc_batchbuffer_avc_intra),
89         NULL
90     },
91
92     {
93         "MFC AVC INTER BATCHBUFFER ",
94         MFC_BATCHBUFFER_AVC_INTER,
95         gen7_mfc_batchbuffer_avc_inter,
96         sizeof(gen7_mfc_batchbuffer_avc_inter),
97         NULL
98     },
99 };
100
101 static void
102 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
103                           int standard_select,
104                           struct intel_encoder_context *encoder_context)
105 {
106     struct intel_batchbuffer *batch = encoder_context->base.batch;
107     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
108
109     assert(standard_select == MFX_FORMAT_MPEG2 ||
110            standard_select == MFX_FORMAT_AVC);
111
112     BEGIN_BCS_BATCH(batch, 5);
113
114     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
115     OUT_BCS_BATCH(batch,
116                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
117                   (MFD_MODE_VLD << 15) | /* VLD mode */
118                   (1 << 10) | /* Stream-Out Enable */
119                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
120                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
121                   (0 << 8)  | /* Pre Deblocking Output */
122                   (0 << 5)  | /* not in stitch mode */
123                   (1 << 4)  | /* encoding mode */
124                   (standard_select << 0));  /* standard select: avc or mpeg2 */
125     OUT_BCS_BATCH(batch,
126                   (0 << 7)  | /* expand NOA bus flag */
127                   (0 << 6)  | /* disable slice-level clock gating */
128                   (0 << 5)  | /* disable clock gating for NOA */
129                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
130                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
131                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
132                   (0 << 1)  |
133                   (0 << 0));
134     OUT_BCS_BATCH(batch, 0);
135     OUT_BCS_BATCH(batch, 0);
136
137     ADVANCE_BCS_BATCH(batch);
138 }
139
140 static void
141 gen7_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
142 {
143     struct intel_batchbuffer *batch = encoder_context->base.batch;
144     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
145
146     BEGIN_BCS_BATCH(batch, 6);
147
148     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
149     OUT_BCS_BATCH(batch, 0);
150     OUT_BCS_BATCH(batch,
151                   ((mfc_context->surface_state.height - 1) << 18) |
152                   ((mfc_context->surface_state.width - 1) << 4));
153     OUT_BCS_BATCH(batch,
154                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
155                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
156                   (0 << 22) | /* surface object control state, FIXME??? */
157                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
158                   (0 << 2)  | /* must be 0 for interleave U/V */
159                   (1 << 1)  | /* must be tiled */
160                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
161     OUT_BCS_BATCH(batch,
162                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
163                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
164     OUT_BCS_BATCH(batch, 0);
165
166     ADVANCE_BCS_BATCH(batch);
167 }
168
169 static void
170 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
171 {
172     struct intel_batchbuffer *batch = encoder_context->base.batch;
173     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
174     struct gen6_vme_context *vme_context = encoder_context->vme_context;
175
176     BEGIN_BCS_BATCH(batch, 11);
177
178     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     /* MFX Indirect MV Object Base Address */
182     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
183     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
189     OUT_BCS_RELOC(batch,
190                   mfc_context->mfc_indirect_pak_bse_object.bo,
191                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                   0);
193     OUT_BCS_RELOC(batch,
194                   mfc_context->mfc_indirect_pak_bse_object.bo,
195                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
196                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
197
198     ADVANCE_BCS_BATCH(batch);
199 }
200
201 static void
202 gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
203                        struct intel_encoder_context *encoder_context)
204 {
205     struct intel_batchbuffer *batch = encoder_context->base.batch;
206     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
207     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
208
209     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
210     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
211
212     BEGIN_BCS_BATCH(batch, 16);
213
214     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
215     /*DW1 frame size */
216     OUT_BCS_BATCH(batch,
217                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
218     OUT_BCS_BATCH(batch, 
219                   ((height_in_mbs - 1) << 16) | 
220                   ((width_in_mbs - 1) << 0));
221     /*DW3 Qp setting */
222     OUT_BCS_BATCH(batch, 
223                   (0 << 24) |   /* Second Chroma QP Offset */
224                   (0 << 16) |   /* Chroma QP Offset */
225                   (0 << 14) |   /* Max-bit conformance Intra flag */
226                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
227                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
228                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
229                   (0 << 8)  |   /* FIXME: Image Structure */
230                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
231     OUT_BCS_BATCH(batch,
232                   (0 << 16) |   /* Mininum Frame size */
233                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
234                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
235                   (0 << 13) |   /* CABAC 0 word insertion test enable */
236                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
237                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
238                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
239                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
240                   (0 << 6)  |   /* Only valid for VLD decoding mode */
241                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
242                   (0 << 4)  |   /* Direct 8x8 inference flag */
243                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
244                   (1 << 2)  |   /* Frame MB only flag */
245                   (0 << 1)  |   /* MBAFF mode is in active */
246                   (0 << 0));    /* Field picture flag */
247     /*DW5 trequllis quantization */
248     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
249     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
250                   (0xBB8 << 16) |       /* InterMbMaxSz */
251                   (0xEE8) );            /* IntraMbMaxSz */
252     /* DW7 */
253     OUT_BCS_BATCH(batch, 0);            /* Reserved */
254     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
255     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
256     /* DW10 frame bit setting */
257     OUT_BCS_BATCH(batch, 0x8C000000);
258     OUT_BCS_BATCH(batch, 0x00010000);
259     OUT_BCS_BATCH(batch, 0);
260     /* DW13 Ref setting */
261     OUT_BCS_BATCH(batch, 0x02010100);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264
265     ADVANCE_BCS_BATCH(batch);
266 }
267
268 static void
269 gen7_mfc_qm_state(VADriverContextP ctx,
270                   int qm_type,
271                   unsigned int *qm,
272                   int qm_length,
273                   struct intel_encoder_context *encoder_context)
274 {
275     struct intel_batchbuffer *batch = encoder_context->base.batch;
276     unsigned int qm_buffer[16];
277
278     assert(qm_length <= 16);
279     assert(sizeof(*qm) == 4);
280     memcpy(qm_buffer, qm, qm_length * 4);
281
282     BEGIN_BCS_BATCH(batch, 18);
283     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
284     OUT_BCS_BATCH(batch, qm_type << 0);
285     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
286     ADVANCE_BCS_BATCH(batch);
287 }
288
289 static void
290 gen7_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
291 {
292     unsigned int qm[16] = {
293         0x10101010, 0x10101010, 0x10101010, 0x10101010,
294         0x10101010, 0x10101010, 0x10101010, 0x10101010,
295         0x10101010, 0x10101010, 0x10101010, 0x10101010,
296         0x10101010, 0x10101010, 0x10101010, 0x10101010
297     };
298
299     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
300     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
301     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
302     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
303 }
304
305 static void
306 gen7_mfc_fqm_state(VADriverContextP ctx,
307                    int fqm_type,
308                    unsigned int *fqm,
309                    int fqm_length,
310                    struct intel_encoder_context *encoder_context)
311 {
312     struct intel_batchbuffer *batch = encoder_context->base.batch;
313     unsigned int fqm_buffer[32];
314
315     assert(fqm_length <= 32);
316     assert(sizeof(*fqm) == 4);
317     memcpy(fqm_buffer, fqm, fqm_length * 4);
318
319     BEGIN_BCS_BATCH(batch, 34);
320     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
321     OUT_BCS_BATCH(batch, fqm_type << 0);
322     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
323     ADVANCE_BCS_BATCH(batch);
324 }
325
326 static void
327 gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
328 {
329     unsigned int qm[32] = {
330         0x10001000, 0x10001000, 0x10001000, 0x10001000,
331         0x10001000, 0x10001000, 0x10001000, 0x10001000,
332         0x10001000, 0x10001000, 0x10001000, 0x10001000,
333         0x10001000, 0x10001000, 0x10001000, 0x10001000,
334         0x10001000, 0x10001000, 0x10001000, 0x10001000,
335         0x10001000, 0x10001000, 0x10001000, 0x10001000,
336         0x10001000, 0x10001000, 0x10001000, 0x10001000,
337         0x10001000, 0x10001000, 0x10001000, 0x10001000
338     };
339
340     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
341     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
342     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
343     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
344 }
345
346 static void
347 gen7_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
348                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
349                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
350                            struct intel_batchbuffer *batch)
351 {
352     if (batch == NULL)
353         batch = encoder_context->base.batch;
354
355     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
356
357     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
358     OUT_BCS_BATCH(batch,
359                   (0 << 16) |   /* always start at offset 0 */
360                   (data_bits_in_last_dw << 8) |
361                   (skip_emul_byte_count << 4) |
362                   (!!emulation_flag << 3) |
363                   ((!!is_last_header) << 2) |
364                   ((!!is_end_of_slice) << 1) |
365                   (0 << 0));    /* FIXME: ??? */
366     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
367
368     ADVANCE_BCS_BATCH(batch);
369 }
370
371 static const int
372 va_to_gen7_mpeg2_picture_type[3] = {
373     1,  /* I */
374     2,  /* P */
375     3   /* B */
376 };
377
378 static void
379 gen7_mfc_mpeg2_pic_state(VADriverContextP ctx,
380                          struct intel_encoder_context *encoder_context,
381                          struct encode_state *encode_state)
382 {
383     struct intel_batchbuffer *batch = encoder_context->base.batch;
384     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
385     VAEncPictureParameterBufferMPEG2 *pic_param;
386     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
387     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
388     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
389
390     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
391     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
392     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
393
394     BEGIN_BCS_BATCH(batch, 13);
395     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
396     OUT_BCS_BATCH(batch,
397                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
398                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
399                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
400                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
401                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
402                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
403                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
404                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
405                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
406                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
407                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
408                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
409     OUT_BCS_BATCH(batch,
410                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
411                   va_to_gen7_mpeg2_picture_type[pic_param->picture_type] << 9 |
412                   0);
413     OUT_BCS_BATCH(batch,
414                   1 << 31 |     /* slice concealment */
415                   (height_in_mbs - 1) << 16 |
416                   (width_in_mbs - 1));
417
418     if (slice_param && slice_param->quantiser_scale_code >= 14) 
419         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
420     else
421         OUT_BCS_BATCH(batch, 0);
422
423     OUT_BCS_BATCH(batch, 0);
424     OUT_BCS_BATCH(batch,
425                   0xFFF << 16 | /* InterMBMaxSize */
426                   0xFFF << 0 |  /* IntraMBMaxSize */
427                   0);
428     OUT_BCS_BATCH(batch, 0);
429     OUT_BCS_BATCH(batch, 0);
430     OUT_BCS_BATCH(batch, 0);
431     OUT_BCS_BATCH(batch, 0);
432     OUT_BCS_BATCH(batch, 0);
433     OUT_BCS_BATCH(batch, 0);
434     ADVANCE_BCS_BATCH(batch);
435 }
436
437 static void
438 gen7_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
439 {
440     unsigned char intra_qm[64] = {
441         8, 16, 19, 22, 26, 27, 29, 34,
442         16, 16, 22, 24, 27, 29, 34, 37,
443         19, 22, 26, 27, 29, 34, 34, 38,
444         22, 22, 26, 27, 29, 34, 37, 40,
445         22, 26, 27, 29, 32, 35, 40, 48,
446         26, 27, 29, 32, 35, 40, 48, 58,
447         26, 27, 29, 34, 38, 46, 56, 69,
448         27, 29, 35, 38, 46, 56, 69, 83
449     };
450
451     unsigned char non_intra_qm[64] = {
452         16, 16, 16, 16, 16, 16, 16, 16,
453         16, 16, 16, 16, 16, 16, 16, 16,
454         16, 16, 16, 16, 16, 16, 16, 16,
455         16, 16, 16, 16, 16, 16, 16, 16,
456         16, 16, 16, 16, 16, 16, 16, 16,
457         16, 16, 16, 16, 16, 16, 16, 16,
458         16, 16, 16, 16, 16, 16, 16, 16,
459         16, 16, 16, 16, 16, 16, 16, 16
460     };
461
462     gen7_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
463     gen7_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
464 }
465
466 static void
467 gen7_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
468 {
469     unsigned short intra_fqm[64] = {
470         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
471         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
472         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
473         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
474         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
475         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
476         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
477         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
478     };
479
480     unsigned short non_intra_fqm[64] = {
481         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
482         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
483         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
484         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
485         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
486         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
487         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
488         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
489     };
490
491     gen7_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
492     gen7_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
493 }
494
495 static void
496 gen7_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
497                                 struct intel_encoder_context *encoder_context,
498                                 int x, int y,
499                                 int next_x, int next_y,
500                                 int is_fisrt_slice_group,
501                                 int is_last_slice_group,
502                                 int intra_slice,
503                                 int qp,
504                                 struct intel_batchbuffer *batch)
505 {
506     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
507
508     if (batch == NULL)
509         batch = encoder_context->base.batch;
510
511     BEGIN_BCS_BATCH(batch, 8);
512
513     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
514     OUT_BCS_BATCH(batch,
515                   0 << 31 |                             /* MbRateCtrlFlag */
516                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
517                   1 << 17 |                             /* Insert Header before the first slice group data */
518                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
519                   1 << 15 |                             /* TailPresentFlag: always 1 */
520                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
521                   !!intra_slice << 13 |                 /* IntraSlice */
522                   !!intra_slice << 12 |                 /* IntraSliceFlag */
523                   0);
524     OUT_BCS_BATCH(batch,
525                   next_y << 24 |
526                   next_x << 16 |
527                   y << 8 |
528                   x << 0 |
529                   0);
530     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
531     /* bitstream pointer is only loaded once for the first slice of a frame when 
532      * LoadSlicePointerFlag is 0
533      */
534     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
535     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
536     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
537     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
538
539     ADVANCE_BCS_BATCH(batch);
540 }
541
542 static int
543 gen7_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
544                                 struct intel_encoder_context *encoder_context,
545                                 int x, int y,
546                                 int first_mb_in_slice,
547                                 int last_mb_in_slice,
548                                 int first_mb_in_slice_group,
549                                 int last_mb_in_slice_group,
550                                 int mb_type,
551                                 int qp_scale_code,
552                                 int coded_block_pattern,
553                                 unsigned char target_size_in_word,
554                                 unsigned char max_size_in_word,
555                                 struct intel_batchbuffer *batch)
556 {
557     int len_in_dwords = 9;
558
559     if (batch == NULL)
560         batch = encoder_context->base.batch;
561
562     BEGIN_BCS_BATCH(batch, len_in_dwords);
563
564     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
565     OUT_BCS_BATCH(batch,
566                   0 << 24 |     /* PackedMvNum */
567                   0 << 20 |     /* MvFormat */
568                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
569                   0 << 15 |     /* TransformFlag: frame DCT */
570                   0 << 14 |     /* FieldMbFlag */
571                   1 << 13 |     /* IntraMbFlag */
572                   mb_type << 8 |   /* MbType: Intra */
573                   0 << 2 |      /* SkipMbFlag */
574                   0 << 0 |      /* InterMbMode */
575                   0);
576     OUT_BCS_BATCH(batch, y << 16 | x);
577     OUT_BCS_BATCH(batch,
578                   max_size_in_word << 24 |
579                   target_size_in_word << 16 |
580                   coded_block_pattern << 6 |      /* CBP */
581                   0);
582     OUT_BCS_BATCH(batch,
583                   last_mb_in_slice << 31 |
584                   first_mb_in_slice << 30 |
585                   0 << 27 |     /* EnableCoeffClamp */
586                   last_mb_in_slice_group << 26 |
587                   0 << 25 |     /* MbSkipConvDisable */
588                   first_mb_in_slice_group << 24 |
589                   0 << 16 |     /* MvFieldSelect */
590                   qp_scale_code << 0 |
591                   0);
592     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
593     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
594     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
595     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
596
597     ADVANCE_BCS_BATCH(batch);
598
599     return len_in_dwords;
600 }
601
602 #define MV_OFFSET_IN_WORD       112
603
604 static struct _mv_ranges
605 {
606     int low;    /* in the unit of 1/2 pixel */
607     int high;   /* in the unit of 1/2 pixel */
608 } mv_ranges[] = {
609     {0, 0},
610     {-16, 15},
611     {-32, 31},
612     {-64, 63},
613     {-128, 127},
614     {-256, 255},
615     {-512, 511},
616     {-1024, 1023},
617     {-2048, 2047},
618     {-4096, 4095}
619 };
620
621 static int
622 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
623 {
624     if (mv + pos * 16 * 2 < 0 ||
625         mv + (pos + 1) * 16 * 2 > display_max * 2)
626         mv = 0;
627
628     if (f_code > 0 && f_code < 10) {
629         if (mv < mv_ranges[f_code].low)
630             mv = mv_ranges[f_code].low;
631
632         if (mv > mv_ranges[f_code].high)
633             mv = mv_ranges[f_code].high;
634     }
635
636     return mv;
637 }
638
639 static int
640 gen7_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
641                                 struct encode_state *encode_state,
642                                 struct intel_encoder_context *encoder_context,
643                                 unsigned int *msg,
644                                 int width_in_mbs, int height_in_mbs,
645                                 int x, int y,
646                                 int first_mb_in_slice,
647                                 int last_mb_in_slice,
648                                 int first_mb_in_slice_group,
649                                 int last_mb_in_slice_group,
650                                 int qp_scale_code,
651                                 unsigned char target_size_in_word,
652                                 unsigned char max_size_in_word,
653                                 struct intel_batchbuffer *batch)
654 {
655     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
656     int len_in_dwords = 9;
657     short *mvptr, mvx0, mvy0, mvx1, mvy1;
658  
659     if (batch == NULL)
660         batch = encoder_context->base.batch;
661
662     mvptr = (short *)msg;
663     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
664     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
665     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
666     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
667
668     BEGIN_BCS_BATCH(batch, len_in_dwords);
669
670     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
671     OUT_BCS_BATCH(batch,
672                   2 << 24 |     /* PackedMvNum */
673                   7 << 20 |     /* MvFormat */
674                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
675                   0 << 15 |     /* TransformFlag: frame DCT */
676                   0 << 14 |     /* FieldMbFlag */
677                   0 << 13 |     /* IntraMbFlag */
678                   1 << 8 |      /* MbType: Frame-based */
679                   0 << 2 |      /* SkipMbFlag */
680                   0 << 0 |      /* InterMbMode */
681                   0);
682     OUT_BCS_BATCH(batch, y << 16 | x);
683     OUT_BCS_BATCH(batch,
684                   max_size_in_word << 24 |
685                   target_size_in_word << 16 |
686                   0x3f << 6 |   /* CBP */
687                   0);
688     OUT_BCS_BATCH(batch,
689                   last_mb_in_slice << 31 |
690                   first_mb_in_slice << 30 |
691                   0 << 27 |     /* EnableCoeffClamp */
692                   last_mb_in_slice_group << 26 |
693                   0 << 25 |     /* MbSkipConvDisable */
694                   first_mb_in_slice_group << 24 |
695                   0 << 16 |     /* MvFieldSelect */
696                   qp_scale_code << 0 |
697                   0);
698
699     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
700     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
701     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
702     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
703
704     ADVANCE_BCS_BATCH(batch);
705
706     return len_in_dwords;
707 }
708
709 static void
710 gen7_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
711                                           struct encode_state *encode_state,
712                                           struct intel_encoder_context *encoder_context,
713                                           struct intel_batchbuffer *slice_batch)
714 {
715     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
716     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
717
718     if (encode_state->packed_header_data[idx]) {
719         VAEncPackedHeaderParameterBuffer *param = NULL;
720         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
721         unsigned int length_in_bits;
722
723         assert(encode_state->packed_header_param[idx]);
724         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
725         length_in_bits = param->bit_length;
726
727         mfc_context->insert_object(ctx,
728                                    encoder_context,
729                                    header_data,
730                                    ALIGN(length_in_bits, 32) >> 5,
731                                    length_in_bits & 0x1f,
732                                    5,   /* FIXME: check it */
733                                    0,
734                                    0,
735                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
736                                    slice_batch);
737     }
738
739     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
740
741     if (encode_state->packed_header_data[idx]) {
742         VAEncPackedHeaderParameterBuffer *param = NULL;
743         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
744         unsigned int length_in_bits;
745
746         assert(encode_state->packed_header_param[idx]);
747         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
748         length_in_bits = param->bit_length;
749
750         mfc_context->insert_object(ctx,
751                                    encoder_context,
752                                    header_data,
753                                    ALIGN(length_in_bits, 32) >> 5,
754                                    length_in_bits & 0x1f,
755                                    5,   /* FIXME: check it */
756                                    0,
757                                    0,
758                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
759                                    slice_batch);
760     }
761 }
762
763 static void 
764 gen7_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
765                                     struct encode_state *encode_state,
766                                     struct intel_encoder_context *encoder_context,
767                                     int slice_index,
768                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
769                                     struct intel_batchbuffer *slice_batch)
770 {
771     struct gen6_vme_context *vme_context = encoder_context->vme_context;
772     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
773     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
774     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
775     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
776     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
777     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
778     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
779     int i, j;
780     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
781     unsigned int *msg = NULL;
782     unsigned char *msg_ptr = NULL;
783
784     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
785     h_start_pos = slice_param->macroblock_address % width_in_mbs;
786     v_start_pos = slice_param->macroblock_address / width_in_mbs;
787     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
788
789     dri_bo_map(vme_context->vme_output.bo , 0);
790     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
791
792     if (next_slice_group_param) {
793         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
794         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
795     } else {
796         h_next_start_pos = 0;
797         v_next_start_pos = height_in_mbs;
798     }
799
800     gen7_mfc_mpeg2_slicegroup_state(ctx,
801                                     encoder_context,
802                                     h_start_pos,
803                                     v_start_pos,
804                                     h_next_start_pos,
805                                     v_next_start_pos,
806                                     slice_index == 0,
807                                     next_slice_group_param == NULL,
808                                     slice_param->is_intra_slice,
809                                     slice_param->quantiser_scale_code,
810                                     slice_batch);
811
812     if (slice_index == 0) 
813         gen7_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
814
815     /* Insert '00' to make sure the header is valid */
816     mfc_context->insert_object(ctx,
817                                encoder_context,
818                                (unsigned int*)section_delimiter,
819                                1,
820                                8,   /* 8bits in the last DWORD */
821                                1,   /* 1 byte */
822                                1,
823                                0,
824                                0,
825                                slice_batch);
826
827     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
828         /* PAK for each macroblocks */
829         for (j = 0; j < slice_param->num_macroblocks; j++) {
830             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
831             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
832             int first_mb_in_slice = (j == 0);
833             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
834             int first_mb_in_slice_group = (i == 0 && j == 0);
835             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
836                                           j == slice_param->num_macroblocks - 1);
837
838             if (slice_param->is_intra_slice) {
839                 gen7_mfc_mpeg2_pak_object_intra(ctx,
840                                                 encoder_context,
841                                                 h_pos, v_pos,
842                                                 first_mb_in_slice,
843                                                 last_mb_in_slice,
844                                                 first_mb_in_slice_group,
845                                                 last_mb_in_slice_group,
846                                                 0x1a,
847                                                 slice_param->quantiser_scale_code,
848                                                 0x3f,
849                                                 0,
850                                                 0xff,
851                                                 slice_batch);
852             } else {
853                 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
854
855                 if(msg[32] & INTRA_MB_FLAG_MASK) {
856                     gen7_mfc_mpeg2_pak_object_intra(ctx,
857                                                     encoder_context,
858                                                     h_pos, v_pos,
859                                                     first_mb_in_slice,
860                                                     last_mb_in_slice,
861                                                     first_mb_in_slice_group,
862                                                     last_mb_in_slice_group,
863                                                     0x1a,
864                                                     slice_param->quantiser_scale_code,
865                                                     0x3f,
866                                                     0,
867                                                     0xff,
868                                                     slice_batch);
869                 } else {
870
871                     gen7_mfc_mpeg2_pak_object_inter(ctx,
872                                                     encode_state,
873                                                     encoder_context,
874                                                     msg,
875                                                     width_in_mbs, height_in_mbs,
876                                                     h_pos, v_pos,
877                                                     first_mb_in_slice,
878                                                     last_mb_in_slice,
879                                                     first_mb_in_slice_group,
880                                                     last_mb_in_slice_group,
881                                                     slice_param->quantiser_scale_code,
882                                                     0,
883                                                     0xff,
884                                                     slice_batch);
885                 }
886             }
887         }
888
889         slice_param++;
890     }
891
892     dri_bo_unmap(vme_context->vme_output.bo);
893
894     /* tail data */
895     if (next_slice_group_param == NULL) { /* end of a picture */
896         mfc_context->insert_object(ctx,
897                                    encoder_context,
898                                    (unsigned int *)tail_delimiter,
899                                    2,
900                                    8,   /* 8bits in the last DWORD */
901                                    5,   /* 5 bytes */
902                                    1,
903                                    1,
904                                    0,
905                                    slice_batch);
906     } else {        /* end of a lsice group */
907         mfc_context->insert_object(ctx,
908                                    encoder_context,
909                                    (unsigned int *)section_delimiter,
910                                    1,
911                                    8,   /* 8bits in the last DWORD */
912                                    1,   /* 1 byte */
913                                    1,
914                                    1,
915                                    0,
916                                    slice_batch);
917     }
918 }
919
920 /* 
921  * A batch buffer for all slices, including slice state, 
922  * slice insert object and slice pak object commands
923  *
924  */
925 static dri_bo *
926 gen7_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
927                                           struct encode_state *encode_state,
928                                           struct intel_encoder_context *encoder_context)
929 {
930     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
931     struct intel_batchbuffer *batch;
932     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
933     dri_bo *batch_bo;
934     int i;
935
936     batch = mfc_context->aux_batchbuffer;
937     batch_bo = batch->buffer;
938
939     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
940         if (i == encode_state->num_slice_params_ext - 1)
941             next_slice_group_param = NULL;
942         else
943             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
944
945         gen7_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
946     }
947
948     intel_batchbuffer_align(batch, 8);
949     
950     BEGIN_BCS_BATCH(batch, 2);
951     OUT_BCS_BATCH(batch, 0);
952     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
953     ADVANCE_BCS_BATCH(batch);
954
955     dri_bo_reference(batch_bo);
956     intel_batchbuffer_free(batch);
957     mfc_context->aux_batchbuffer = NULL;
958
959     return batch_bo;
960 }
961
962 static void
963 gen7_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
964                                            struct encode_state *encode_state,
965                                            struct intel_encoder_context *encoder_context)
966 {
967     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
968
969     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
970     mfc_context->set_surface_state(ctx, encoder_context);
971     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
972     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
973     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
974     gen7_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
975     gen7_mfc_mpeg2_qm_state(ctx, encoder_context);
976     gen7_mfc_mpeg2_fqm_state(ctx, encoder_context);
977 }
978
979 static void
980 gen7_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
981                                    struct encode_state *encode_state,
982                                    struct intel_encoder_context *encoder_context)
983 {
984     struct intel_batchbuffer *batch = encoder_context->base.batch;
985     dri_bo *slice_batch_bo;
986
987     slice_batch_bo = gen7_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
988
989     // begin programing
990     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
991     intel_batchbuffer_emit_mi_flush(batch);
992     
993     // picture level programing
994     gen7_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
995
996     BEGIN_BCS_BATCH(batch, 2);
997     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
998     OUT_BCS_RELOC(batch,
999                   slice_batch_bo,
1000                   I915_GEM_DOMAIN_COMMAND, 0, 
1001                   0);
1002     ADVANCE_BCS_BATCH(batch);
1003
1004     // end programing
1005     intel_batchbuffer_end_atomic(batch);
1006
1007     dri_bo_unreference(slice_batch_bo);
1008 }
1009
1010 static VAStatus
1011 gen7_mfc_mpeg2_prepare(VADriverContextP ctx,
1012                        struct encode_state *encode_state,
1013                        struct intel_encoder_context *encoder_context)
1014 {
1015     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1016     struct object_surface *obj_surface;
1017     struct object_buffer *obj_buffer;
1018     struct i965_coded_buffer_segment *coded_buffer_segment;
1019     VAStatus vaStatus = VA_STATUS_SUCCESS;
1020     dri_bo *bo;
1021     int i;
1022
1023     /* reconstructed surface */
1024     obj_surface = encode_state->reconstructed_object;
1025     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1026     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
1027     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
1028     mfc_context->surface_state.width = obj_surface->orig_width;
1029     mfc_context->surface_state.height = obj_surface->orig_height;
1030     mfc_context->surface_state.w_pitch = obj_surface->width;
1031     mfc_context->surface_state.h_pitch = obj_surface->height;
1032
1033     /* forward reference */
1034     obj_surface = encode_state->reference_objects[0];
1035
1036     if (obj_surface && obj_surface->bo) {
1037         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
1038         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
1039     } else
1040         mfc_context->reference_surfaces[0].bo = NULL;
1041
1042     /* backward reference */
1043     obj_surface = encode_state->reference_objects[1];
1044
1045     if (obj_surface && obj_surface->bo) {
1046         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
1047         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
1048     } else {
1049         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
1050
1051         if (mfc_context->reference_surfaces[1].bo)
1052             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
1053     }
1054
1055     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
1056         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
1057
1058         if (mfc_context->reference_surfaces[i].bo)
1059             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
1060     }
1061     
1062     /* input YUV surface */
1063     obj_surface = encode_state->input_yuv_object;
1064     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1065     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1066
1067     /* coded buffer */
1068     obj_buffer = encode_state->coded_buf_object;
1069     bo = obj_buffer->buffer_store->bo;
1070     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1071     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
1072     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1073     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1074
1075     /* set the internal flag to 0 to indicate the coded size is unknown */
1076     dri_bo_map(bo, 1);
1077     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1078     coded_buffer_segment->mapped = 0;
1079     coded_buffer_segment->codec = encoder_context->codec;
1080     dri_bo_unmap(bo);
1081
1082     return vaStatus;
1083 }
1084
1085 static VAStatus
1086 gen7_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
1087                               struct encode_state *encode_state,
1088                               struct intel_encoder_context *encoder_context)
1089 {
1090     gen6_mfc_init(ctx, encode_state, encoder_context);
1091     gen7_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
1092     /*Programing bcs pipeline*/
1093     gen7_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
1094     gen6_mfc_run(ctx, encode_state, encoder_context);
1095
1096     return VA_STATUS_SUCCESS;
1097 }
1098
1099 VAStatus
1100 gen7_mfc_pipeline(VADriverContextP ctx,
1101                   VAProfile profile,
1102                   struct encode_state *encode_state,
1103                   struct intel_encoder_context *encoder_context)
1104 {
1105     VAStatus vaStatus;
1106
1107     switch (profile) {
1108     case VAProfileH264ConstrainedBaseline:
1109     case VAProfileH264Main:
1110     case VAProfileH264High:
1111         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1112         break;
1113
1114     case VAProfileMPEG2Simple:
1115     case VAProfileMPEG2Main:
1116         vaStatus = gen7_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
1117         break;
1118
1119         /* FIXME: add for other profile */
1120     default:
1121         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1122         break;
1123     }
1124
1125     return vaStatus;
1126 }
1127
1128 Bool
1129 gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1130 {
1131     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1132
1133     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1134
1135     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1136     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1137
1138     mfc_context->gpe_context.curbe.length = 32 * 4;
1139
1140     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1141     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1142     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1143     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1144     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1145
1146     i965_gpe_load_kernels(ctx,
1147                           &mfc_context->gpe_context,
1148                           gen7_mfc_kernels,
1149                           NUM_MFC_KERNEL);
1150
1151     mfc_context->pipe_mode_select = gen7_mfc_pipe_mode_select;
1152     mfc_context->set_surface_state = gen7_mfc_surface_state;
1153     mfc_context->ind_obj_base_addr_state = gen7_mfc_ind_obj_base_addr_state;
1154     mfc_context->avc_img_state = gen7_mfc_avc_img_state;
1155     mfc_context->avc_qm_state = gen7_mfc_avc_qm_state;
1156     mfc_context->avc_fqm_state = gen7_mfc_avc_fqm_state;
1157     mfc_context->insert_object = gen7_mfc_avc_insert_object;
1158     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1159
1160     encoder_context->mfc_context = mfc_context;
1161     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
1162     encoder_context->mfc_pipeline = gen7_mfc_pipeline;
1163     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1164
1165     return True;
1166 }