decoder: h264: fix frame store logic for MVC.
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44 #include "intel_media.h"
45
46 #define AVC_INTRA_RDO_OFFSET    4
47 #define AVC_INTER_RDO_OFFSET    10
48 #define AVC_INTER_MSG_OFFSET    8
49 #define AVC_INTER_MV_OFFSET     48
50 #define AVC_RDO_MASK            0xFFFF
51
52 #define MFC_SOFTWARE_HASWELL    0
53
54 #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
55 #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
56 #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
57
58 #define B0_STEP_REV             2
59 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
60
61 static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
62 #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
63 };
64
65 static struct i965_kernel gen75_mfc_kernels[] = {
66     {
67         "MFC AVC INTRA BATCHBUFFER ",
68         MFC_BATCHBUFFER_AVC_INTRA,
69         gen75_mfc_batchbuffer_avc,
70         sizeof(gen75_mfc_batchbuffer_avc),
71         NULL
72     },
73 };
74
75 #define         INTER_MODE_MASK         0x03
76 #define         INTER_8X8               0x03
77 #define         INTER_16X8              0x01
78 #define         INTER_8X16              0x02
79 #define         SUBMB_SHAPE_MASK        0x00FF00
80
81 #define         INTER_MV8               (4 << 20)
82 #define         INTER_MV32              (6 << 20)
83
84
85 static void
86 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87                            int standard_select,
88                            struct intel_encoder_context *encoder_context)
89 {
90     struct intel_batchbuffer *batch = encoder_context->base.batch;
91     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92     assert(standard_select == MFX_FORMAT_MPEG2 ||
93            standard_select == MFX_FORMAT_AVC);
94
95     BEGIN_BCS_BATCH(batch, 5);
96
97     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
98     OUT_BCS_BATCH(batch,
99                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
100                   (MFD_MODE_VLD << 15) | /* VLD mode */
101                   (0 << 10) | /* Stream-Out Enable */
102                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
103                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
104                   (0 << 5)  | /* not in stitch mode */
105                   (1 << 4)  | /* encoding mode */
106                   (standard_select << 0));  /* standard select: avc or mpeg2 */
107     OUT_BCS_BATCH(batch,
108                   (0 << 7)  | /* expand NOA bus flag */
109                   (0 << 6)  | /* disable slice-level clock gating */
110                   (0 << 5)  | /* disable clock gating for NOA */
111                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
112                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
113                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
114                   (0 << 1)  |
115                   (0 << 0));
116     OUT_BCS_BATCH(batch, 0);
117     OUT_BCS_BATCH(batch, 0);
118
119     ADVANCE_BCS_BATCH(batch);
120 }
121
122 static void
123 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
124 {
125     struct intel_batchbuffer *batch = encoder_context->base.batch;
126     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
127
128     BEGIN_BCS_BATCH(batch, 6);
129
130     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
131     OUT_BCS_BATCH(batch, 0);
132     OUT_BCS_BATCH(batch,
133                   ((mfc_context->surface_state.height - 1) << 18) |
134                   ((mfc_context->surface_state.width - 1) << 4));
135     OUT_BCS_BATCH(batch,
136                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
137                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
138                   (0 << 22) | /* surface object control state, FIXME??? */
139                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
140                   (0 << 2)  | /* must be 0 for interleave U/V */
141                   (1 << 1)  | /* must be tiled */
142                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
143     OUT_BCS_BATCH(batch,
144                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
145                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
146     OUT_BCS_BATCH(batch, 0);
147
148     ADVANCE_BCS_BATCH(batch);
149 }
150
151 static void
152 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
153                                         struct intel_encoder_context *encoder_context)
154 {
155     struct intel_batchbuffer *batch = encoder_context->base.batch;
156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
158
159     BEGIN_BCS_BATCH(batch, 26);
160
161     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
162     /* the DW1-3 is for the MFX indirect bistream offset */
163     OUT_BCS_BATCH(batch, 0);
164     OUT_BCS_BATCH(batch, 0);
165     OUT_BCS_BATCH(batch, 0);
166     /* the DW4-5 is the MFX upper bound */
167     OUT_BCS_BATCH(batch, 0);
168     OUT_BCS_BATCH(batch, 0);
169
170     /* the DW6-10 is for MFX Indirect MV Object Base Address */
171     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
172     OUT_BCS_BATCH(batch, 0);
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
175     OUT_BCS_BATCH(batch, 0);
176
177     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0);
181     OUT_BCS_BATCH(batch, 0);
182     OUT_BCS_BATCH(batch, 0);
183
184     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189     OUT_BCS_BATCH(batch, 0);
190
191     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
192     OUT_BCS_RELOC(batch,
193                   mfc_context->mfc_indirect_pak_bse_object.bo,
194                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195                   0);
196     OUT_BCS_BATCH(batch, 0);
197     OUT_BCS_BATCH(batch, 0);
198         
199     OUT_BCS_RELOC(batch,
200                   mfc_context->mfc_indirect_pak_bse_object.bo,
201                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
203     OUT_BCS_BATCH(batch, 0);
204
205     ADVANCE_BCS_BATCH(batch);
206 }
207
208 static void
209 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
210 {
211     struct intel_batchbuffer *batch = encoder_context->base.batch;
212     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
213     struct gen6_vme_context *vme_context = encoder_context->vme_context;
214     struct i965_driver_data *i965 = i965_driver_data(ctx);
215
216     if (IS_STEPPING_BPLUS(i965)) {
217         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
218         return;
219     }
220
221     BEGIN_BCS_BATCH(batch, 11);
222
223     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     /* MFX Indirect MV Object Base Address */
227     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
228     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
234     OUT_BCS_RELOC(batch,
235                   mfc_context->mfc_indirect_pak_bse_object.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   0);
238     OUT_BCS_RELOC(batch,
239                   mfc_context->mfc_indirect_pak_bse_object.bo,
240                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
241                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
242
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
248                         struct intel_encoder_context *encoder_context)
249 {
250     struct intel_batchbuffer *batch = encoder_context->base.batch;
251     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
252     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
253
254     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
255     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
256
257     BEGIN_BCS_BATCH(batch, 16);
258
259     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
260     /*DW1. MB setting of frame */
261     OUT_BCS_BATCH(batch,
262                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
263     OUT_BCS_BATCH(batch, 
264                   ((height_in_mbs - 1) << 16) | 
265                   ((width_in_mbs - 1) << 0));
266     /* DW3 QP setting */
267     OUT_BCS_BATCH(batch, 
268                   (0 << 24) |   /* Second Chroma QP Offset */
269                   (0 << 16) |   /* Chroma QP Offset */
270                   (0 << 14) |   /* Max-bit conformance Intra flag */
271                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
272                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
273                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
274                   (0 << 8)  |   /* FIXME: Image Structure */
275                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
276     OUT_BCS_BATCH(batch,
277                   (0 << 16) |   /* Mininum Frame size */
278                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
279                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
280                   (0 << 13) |   /* CABAC 0 word insertion test enable */
281                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
282                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
283                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
284                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
285                   (0 << 6)  |   /* Only valid for VLD decoding mode */
286                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
287                   (0 << 4)  |   /* Direct 8x8 inference flag */
288                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
289                   (1 << 2)  |   /* Frame MB only flag */
290                   (0 << 1)  |   /* MBAFF mode is in active */
291                   (0 << 0));    /* Field picture flag */
292     /* DW5 Trellis quantization */
293     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
294     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
295                   (0xBB8 << 16) |       /* InterMbMaxSz */
296                   (0xEE8) );            /* IntraMbMaxSz */
297     OUT_BCS_BATCH(batch, 0);            /* Reserved */
298     /* DW8. QP delta */
299     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
300     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
301     /* DW10. Bit setting for MB */
302     OUT_BCS_BATCH(batch, 0x8C000000);
303     OUT_BCS_BATCH(batch, 0x00010000);
304     /* DW12. */
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0x02010100);
307     /* DW14. For short format */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310
311     ADVANCE_BCS_BATCH(batch);
312 }
313
314 static void
315 gen75_mfc_qm_state(VADriverContextP ctx,
316                    int qm_type,
317                    unsigned int *qm,
318                    int qm_length,
319                    struct intel_encoder_context *encoder_context)
320 {
321     struct intel_batchbuffer *batch = encoder_context->base.batch;
322     unsigned int qm_buffer[16];
323
324     assert(qm_length <= 16);
325     assert(sizeof(*qm) == 4);
326     memcpy(qm_buffer, qm, qm_length * 4);
327
328     BEGIN_BCS_BATCH(batch, 18);
329     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
330     OUT_BCS_BATCH(batch, qm_type << 0);
331     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335 static void
336 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
337 {
338     unsigned int qm[16] = {
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010,
341         0x10101010, 0x10101010, 0x10101010, 0x10101010,
342         0x10101010, 0x10101010, 0x10101010, 0x10101010
343     };
344
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
347     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
348     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
349 }
350
351 static void
352 gen75_mfc_fqm_state(VADriverContextP ctx,
353                     int fqm_type,
354                     unsigned int *fqm,
355                     int fqm_length,
356                     struct intel_encoder_context *encoder_context)
357 {
358     struct intel_batchbuffer *batch = encoder_context->base.batch;
359     unsigned int fqm_buffer[32];
360
361     assert(fqm_length <= 32);
362     assert(sizeof(*fqm) == 4);
363     memcpy(fqm_buffer, fqm, fqm_length * 4);
364
365     BEGIN_BCS_BATCH(batch, 34);
366     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
367     OUT_BCS_BATCH(batch, fqm_type << 0);
368     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
374 {
375     unsigned int qm[32] = {
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000,
382         0x10001000, 0x10001000, 0x10001000, 0x10001000,
383         0x10001000, 0x10001000, 0x10001000, 0x10001000
384     };
385
386     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
387     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
388     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
389     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
390 }
391
392 static void
393 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
394                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
395                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
396                             struct intel_batchbuffer *batch)
397 {
398     if (batch == NULL)
399         batch = encoder_context->base.batch;
400
401     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
402
403     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
404     OUT_BCS_BATCH(batch,
405                   (0 << 16) |   /* always start at offset 0 */
406                   (data_bits_in_last_dw << 8) |
407                   (skip_emul_byte_count << 4) |
408                   (!!emulation_flag << 3) |
409                   ((!!is_last_header) << 2) |
410                   ((!!is_end_of_slice) << 1) |
411                   (0 << 0));    /* FIXME: ??? */
412     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
413
414     ADVANCE_BCS_BATCH(batch);
415 }
416
417
418 static void gen75_mfc_init(VADriverContextP ctx,
419                            struct encode_state *encode_state,
420                            struct intel_encoder_context *encoder_context)
421 {
422     struct i965_driver_data *i965 = i965_driver_data(ctx);
423     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
424     dri_bo *bo;
425     int i;
426     int width_in_mbs = 0;
427     int height_in_mbs = 0;
428     int slice_batchbuffer_size;
429
430     if (encoder_context->codec == CODEC_H264 ||
431         encoder_context->codec == CODEC_H264_MVC) {
432         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
433         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
434         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
435     } else {
436         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
437
438         assert(encoder_context->codec == CODEC_MPEG2);
439
440         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
441         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
442     }
443
444     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
445                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
446
447     /*Encode common setup for MFC*/
448     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
449     mfc_context->post_deblocking_output.bo = NULL;
450
451     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
452     mfc_context->pre_deblocking_output.bo = NULL;
453
454     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
455     mfc_context->uncompressed_picture_source.bo = NULL;
456
457     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
458     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
459
460     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
461         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
462         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
463         mfc_context->direct_mv_buffers[i].bo = NULL;
464     }
465
466     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
467         if (mfc_context->reference_surfaces[i].bo != NULL)
468             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
469         mfc_context->reference_surfaces[i].bo = NULL;  
470     }
471
472     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
473     bo = dri_bo_alloc(i965->intel.bufmgr,
474                       "Buffer",
475                       width_in_mbs * 64,
476                       64);
477     assert(bo);
478     mfc_context->intra_row_store_scratch_buffer.bo = bo;
479
480     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
481     bo = dri_bo_alloc(i965->intel.bufmgr,
482                       "Buffer",
483                       width_in_mbs * height_in_mbs * 16,
484                       64);
485     assert(bo);
486     mfc_context->macroblock_status_buffer.bo = bo;
487
488     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
489     bo = dri_bo_alloc(i965->intel.bufmgr,
490                       "Buffer",
491                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
492                       64);
493     assert(bo);
494     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
495
496     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
497     bo = dri_bo_alloc(i965->intel.bufmgr,
498                       "Buffer",
499                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
500                       0x1000);
501     assert(bo);
502     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
503
504     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
505     mfc_context->mfc_batchbuffer_surface.bo = NULL;
506
507     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
508     mfc_context->aux_batchbuffer_surface.bo = NULL;
509
510     if (mfc_context->aux_batchbuffer)
511         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
512
513     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
514                                                         slice_batchbuffer_size);
515     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
516     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
517     mfc_context->aux_batchbuffer_surface.pitch = 16;
518     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
519     mfc_context->aux_batchbuffer_surface.size_block = 16;
520
521     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
522 }
523
524 static void
525 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
526                                     struct intel_encoder_context *encoder_context)
527 {
528     struct intel_batchbuffer *batch = encoder_context->base.batch;
529     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 61);
533
534     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
535
536     /* the DW1-3 is for pre_deblocking */
537     if (mfc_context->pre_deblocking_output.bo)
538         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
539                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
540                       0);
541     else
542         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
543
544     OUT_BCS_BATCH(batch, 0);
545     OUT_BCS_BATCH(batch, 0);
546     /* the DW4-6 is for the post_deblocking */
547
548     if (mfc_context->post_deblocking_output.bo)
549         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
550                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551                       0);                                                                                       /* post output addr  */ 
552     else
553         OUT_BCS_BATCH(batch, 0);
554     OUT_BCS_BATCH(batch, 0);
555     OUT_BCS_BATCH(batch, 0);
556
557     /* the DW7-9 is for the uncompressed_picture */
558     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
559                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0); /* uncompressed data */
561
562     OUT_BCS_BATCH(batch, 0);
563     OUT_BCS_BATCH(batch, 0);
564
565     /* the DW10-12 is for the mb status */
566     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
567                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568                   0); /* StreamOut data*/
569     OUT_BCS_BATCH(batch, 0);
570     OUT_BCS_BATCH(batch, 0);
571
572     /* the DW13-15 is for the intra_row_store_scratch */
573     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
574                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                   0);   
576     OUT_BCS_BATCH(batch, 0);
577     OUT_BCS_BATCH(batch, 0);
578
579     /* the DW16-18 is for the deblocking filter */
580     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
581                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
582                   0);
583     OUT_BCS_BATCH(batch, 0);
584     OUT_BCS_BATCH(batch, 0);
585
586     /* the DW 19-50 is for Reference pictures*/
587     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
588         if ( mfc_context->reference_surfaces[i].bo != NULL) {
589             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
590                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
591                           0);                   
592         } else {
593             OUT_BCS_BATCH(batch, 0);
594         }
595         OUT_BCS_BATCH(batch, 0);
596     }
597     OUT_BCS_BATCH(batch, 0);
598
599     /* The DW 52-54 is for the MB status buffer */
600     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
601                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
602                   0);                                                                                   /* Macroblock status buffer*/
603         
604     OUT_BCS_BATCH(batch, 0);
605     OUT_BCS_BATCH(batch, 0);
606
607     /* the DW 55-57 is the ILDB buffer */
608     OUT_BCS_BATCH(batch, 0);
609     OUT_BCS_BATCH(batch, 0);
610     OUT_BCS_BATCH(batch, 0);
611
612     /* the DW 58-60 is the second ILDB buffer */
613     OUT_BCS_BATCH(batch, 0);
614     OUT_BCS_BATCH(batch, 0);
615     OUT_BCS_BATCH(batch, 0);
616     ADVANCE_BCS_BATCH(batch);
617 }
618
619 static void
620 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
621 {
622     struct intel_batchbuffer *batch = encoder_context->base.batch;
623     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
624     struct i965_driver_data *i965 = i965_driver_data(ctx);
625     int i;
626
627     if (IS_STEPPING_BPLUS(i965)) {
628         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
629         return;
630     }
631
632     BEGIN_BCS_BATCH(batch, 25);
633
634     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
635
636     if (mfc_context->pre_deblocking_output.bo)
637         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
638                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
639                       0);
640     else
641         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
642
643     if (mfc_context->post_deblocking_output.bo)
644         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
645                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
646                       0);                                                                                       /* post output addr  */ 
647     else
648         OUT_BCS_BATCH(batch, 0);
649
650     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
651                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
652                   0);                                                                                   /* uncompressed data */
653     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
654                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
655                   0);                                                                                   /* StreamOut data*/
656     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
657                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
658                   0);   
659     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
660                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
661                   0);
662     /* 7..22 Reference pictures*/
663     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
664         if ( mfc_context->reference_surfaces[i].bo != NULL) {
665             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
666                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
667                           0);                   
668         } else {
669             OUT_BCS_BATCH(batch, 0);
670         }
671     }
672     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
673                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
674                   0);                                                                                   /* Macroblock status buffer*/
675
676     OUT_BCS_BATCH(batch, 0);
677
678     ADVANCE_BCS_BATCH(batch);
679 }
680
681 static void
682 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
683                                      struct intel_encoder_context *encoder_context)
684 {
685     struct intel_batchbuffer *batch = encoder_context->base.batch;
686     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
687
688     int i;
689
690     BEGIN_BCS_BATCH(batch, 71);
691
692     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
693
694     /* Reference frames and Current frames */
695     /* the DW1-32 is for the direct MV for reference */
696     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
697         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
698             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
699                           I915_GEM_DOMAIN_INSTRUCTION, 0,
700                           0);
701             OUT_BCS_BATCH(batch, 0);
702         } else {
703             OUT_BCS_BATCH(batch, 0);
704             OUT_BCS_BATCH(batch, 0);
705         }
706     }
707     OUT_BCS_BATCH(batch, 0);
708
709     /* the DW34-36 is the MV for the current reference */
710     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
711                   I915_GEM_DOMAIN_INSTRUCTION, 0,
712                   0);
713
714     OUT_BCS_BATCH(batch, 0);
715     OUT_BCS_BATCH(batch, 0);
716
717     /* POL list */
718     for(i = 0; i < 32; i++) {
719         OUT_BCS_BATCH(batch, i/2);
720     }
721     OUT_BCS_BATCH(batch, 0);
722     OUT_BCS_BATCH(batch, 0);
723
724     ADVANCE_BCS_BATCH(batch);
725 }
726
727 static void
728 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
729 {
730     struct intel_batchbuffer *batch = encoder_context->base.batch;
731     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
732     struct i965_driver_data *i965 = i965_driver_data(ctx);
733     int i;
734
735     if (IS_STEPPING_BPLUS(i965)) {
736         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
737         return;
738     }
739
740     BEGIN_BCS_BATCH(batch, 69);
741
742     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
743
744     /* Reference frames and Current frames */
745     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
746         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
747             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
748                           I915_GEM_DOMAIN_INSTRUCTION, 0,
749                           0);
750         } else {
751             OUT_BCS_BATCH(batch, 0);
752         }
753     }
754
755     /* POL list */
756     for(i = 0; i < 32; i++) {
757         OUT_BCS_BATCH(batch, i/2);
758     }
759     OUT_BCS_BATCH(batch, 0);
760     OUT_BCS_BATCH(batch, 0);
761
762     ADVANCE_BCS_BATCH(batch);
763 }
764
765
766 static void
767 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
768                                         struct intel_encoder_context *encoder_context)
769 {
770     struct intel_batchbuffer *batch = encoder_context->base.batch;
771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
772
773     BEGIN_BCS_BATCH(batch, 10);
774
775     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
776     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
777                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
778                   0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781         
782     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786
787     /* the DW7-9 is for Bitplane Read Buffer Base Address */
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790     OUT_BCS_BATCH(batch, 0);
791
792     ADVANCE_BCS_BATCH(batch);
793 }
794
795 static void
796 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
797 {
798     struct intel_batchbuffer *batch = encoder_context->base.batch;
799     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
800     struct i965_driver_data *i965 = i965_driver_data(ctx);
801
802     if (IS_STEPPING_BPLUS(i965)) {
803         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
804         return;
805     }
806
807     BEGIN_BCS_BATCH(batch, 4);
808
809     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
810     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
811                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
812                   0);
813     OUT_BCS_BATCH(batch, 0);
814     OUT_BCS_BATCH(batch, 0);
815
816     ADVANCE_BCS_BATCH(batch);
817 }
818
819
820 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
821                                                        struct encode_state *encode_state,
822                                                        struct intel_encoder_context *encoder_context)
823 {
824     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
825
826     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
827     mfc_context->set_surface_state(ctx, encoder_context);
828     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
829     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
830     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
831     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
832     mfc_context->avc_qm_state(ctx, encoder_context);
833     mfc_context->avc_fqm_state(ctx, encoder_context);
834     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
835     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
836 }
837
838
839 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
840                               struct encode_state *encode_state,
841                               struct intel_encoder_context *encoder_context)
842 {
843     struct intel_batchbuffer *batch = encoder_context->base.batch;
844
845     intel_batchbuffer_flush(batch);             //run the pipeline
846
847     return VA_STATUS_SUCCESS;
848 }
849
850
851 static VAStatus
852 gen75_mfc_stop(VADriverContextP ctx, 
853                struct encode_state *encode_state,
854                struct intel_encoder_context *encoder_context,
855                int *encoded_bits_size)
856 {
857     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
858     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
859     VACodedBufferSegment *coded_buffer_segment;
860     
861     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
862     assert(vaStatus == VA_STATUS_SUCCESS);
863     *encoded_bits_size = coded_buffer_segment->size * 8;
864     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
865
866     return VA_STATUS_SUCCESS;
867 }
868
869
870 static void
871 gen75_mfc_avc_slice_state(VADriverContextP ctx,
872                           VAEncPictureParameterBufferH264 *pic_param,
873                           VAEncSliceParameterBufferH264 *slice_param,
874                           struct encode_state *encode_state,
875                           struct intel_encoder_context *encoder_context,
876                           int rate_control_enable,
877                           int qp,
878                           struct intel_batchbuffer *batch)
879 {
880     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
881     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
882     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
883     int beginmb = slice_param->macroblock_address;
884     int endmb = beginmb + slice_param->num_macroblocks;
885     int beginx = beginmb % width_in_mbs;
886     int beginy = beginmb / width_in_mbs;
887     int nextx =  endmb % width_in_mbs;
888     int nexty = endmb / width_in_mbs;
889     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
890     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
891     int maxQpN, maxQpP;
892     unsigned char correct[6], grow, shrink;
893     int i;
894     int weighted_pred_idc = 0;
895     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
896     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
897     int num_ref_l0 = 0, num_ref_l1 = 0;
898
899     if (batch == NULL)
900         batch = encoder_context->base.batch;
901
902     if (slice_type == SLICE_TYPE_I) {
903         luma_log2_weight_denom = 0;
904         chroma_log2_weight_denom = 0;
905     } else if (slice_type == SLICE_TYPE_P) {
906         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
907         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
908
909         if (slice_param->num_ref_idx_active_override_flag)
910             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
911     } else if (slice_type == SLICE_TYPE_B) {
912         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
913         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
914         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
915
916         if (slice_param->num_ref_idx_active_override_flag) {
917             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
918             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
919         }
920
921         if (weighted_pred_idc == 2) {
922             /* 8.4.3 - Derivation process for prediction weights (8-279) */
923             luma_log2_weight_denom = 5;
924             chroma_log2_weight_denom = 5;
925         }
926     }
927
928     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
929     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
930
931     for (i = 0; i < 6; i++)
932         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
933
934     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + 
935         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
936     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + 
937         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
938
939     BEGIN_BCS_BATCH(batch, 11);;
940
941     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
942     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
943
944     OUT_BCS_BATCH(batch,
945                   (num_ref_l0 << 16) |
946                   (num_ref_l1 << 24) |
947                   (chroma_log2_weight_denom << 8) |
948                   (luma_log2_weight_denom << 0));
949
950     OUT_BCS_BATCH(batch, 
951                   (weighted_pred_idc << 30) |
952                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
953                   (slice_param->disable_deblocking_filter_idc << 27) |
954                   (slice_param->cabac_init_idc << 24) |
955                   (qp<<16) |                    /*Slice Quantization Parameter*/
956                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
957                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
958     OUT_BCS_BATCH(batch,
959                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
960                   (beginx << 16) |
961                   slice_param->macroblock_address );
962     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
963     OUT_BCS_BATCH(batch, 
964                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
965                   (1 << 30) |           /*ResetRateControlCounter*/
966                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
967                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
968                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
969                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
970                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
971                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
972                   (last_slice << 19) |     /*IsLastSlice*/
973                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
974                   (1 << 17) |       /*HeaderPresentFlag*/       
975                   (1 << 16) |       /*SliceData PresentFlag*/
976                   (1 << 15) |       /*TailPresentFlag*/
977                   (1 << 13) |       /*RBSP NAL TYPE*/   
978                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
979     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
980     OUT_BCS_BATCH(batch,
981                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
982                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
983                   (shrink << 8)  |
984                   (grow << 0));   
985     OUT_BCS_BATCH(batch,
986                   (correct[5] << 20) |
987                   (correct[4] << 16) |
988                   (correct[3] << 12) |
989                   (correct[2] << 8) |
990                   (correct[1] << 4) |
991                   (correct[0] << 0));
992     OUT_BCS_BATCH(batch, 0);
993
994     ADVANCE_BCS_BATCH(batch);
995 }
996
997
998 #if MFC_SOFTWARE_HASWELL
999
1000 static int
1001 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1002                                int qp,unsigned int *msg,
1003                                struct intel_encoder_context *encoder_context,
1004                                unsigned char target_mb_size, unsigned char max_mb_size,
1005                                struct intel_batchbuffer *batch)
1006 {
1007     int len_in_dwords = 12;
1008     unsigned int intra_msg;
1009 #define         INTRA_MSG_FLAG          (1 << 13)
1010 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1011     if (batch == NULL)
1012         batch = encoder_context->base.batch;
1013
1014     BEGIN_BCS_BATCH(batch, len_in_dwords);
1015
1016     intra_msg = msg[0] & 0xC0FF;
1017     intra_msg |= INTRA_MSG_FLAG;
1018     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1019     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 0);
1022     OUT_BCS_BATCH(batch, 
1023                   (0 << 24) |           /* PackedMvNum, Debug*/
1024                   (0 << 20) |           /* No motion vector */
1025                   (1 << 19) |           /* CbpDcY */
1026                   (1 << 18) |           /* CbpDcU */
1027                   (1 << 17) |           /* CbpDcV */
1028                   intra_msg);
1029
1030     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1031     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1032     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1033
1034     /*Stuff for Intra MB*/
1035     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1036     OUT_BCS_BATCH(batch, msg[2]);       
1037     OUT_BCS_BATCH(batch, msg[3]&0xFF);  
1038     
1039     /*MaxSizeInWord and TargetSzieInWord*/
1040     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1041                   (target_mb_size << 16) );
1042
1043     OUT_BCS_BATCH(batch, 0);
1044
1045     ADVANCE_BCS_BATCH(batch);
1046
1047     return len_in_dwords;
1048 }
1049
1050 static int
1051 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1052                                unsigned int *msg, unsigned int offset,
1053                                struct intel_encoder_context *encoder_context,
1054                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1055                                struct intel_batchbuffer *batch)
1056 {
1057     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1058     int len_in_dwords = 12;
1059     unsigned int inter_msg = 0;
1060     if (batch == NULL)
1061         batch = encoder_context->base.batch;
1062     {
1063 #define MSG_MV_OFFSET   4
1064         unsigned int *mv_ptr;
1065         mv_ptr = msg + MSG_MV_OFFSET;
1066         /* MV of VME output is based on 16 sub-blocks. So it is necessary
1067          * to convert them to be compatible with the format of AVC_PAK
1068          * command.
1069          */
1070         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1071             /* MV[0] and MV[2] are replicated */
1072             mv_ptr[4] = mv_ptr[0];
1073             mv_ptr[5] = mv_ptr[1];
1074             mv_ptr[2] = mv_ptr[8];
1075             mv_ptr[3] = mv_ptr[9];
1076             mv_ptr[6] = mv_ptr[8];
1077             mv_ptr[7] = mv_ptr[9];
1078         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1079             /* MV[0] and MV[1] are replicated */
1080             mv_ptr[2] = mv_ptr[0];
1081             mv_ptr[3] = mv_ptr[1];
1082             mv_ptr[4] = mv_ptr[16];
1083             mv_ptr[5] = mv_ptr[17];
1084             mv_ptr[6] = mv_ptr[24];
1085             mv_ptr[7] = mv_ptr[25];
1086         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1087                    !(msg[1] & SUBMB_SHAPE_MASK)) {
1088             /* Don't touch MV[0] or MV[1] */
1089             mv_ptr[2] = mv_ptr[8];
1090             mv_ptr[3] = mv_ptr[9];
1091             mv_ptr[4] = mv_ptr[16];
1092             mv_ptr[5] = mv_ptr[17];
1093             mv_ptr[6] = mv_ptr[24];
1094             mv_ptr[7] = mv_ptr[25];
1095         }
1096     }
1097
1098     BEGIN_BCS_BATCH(batch, len_in_dwords);
1099
1100     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1101
1102     inter_msg = 32;
1103     /* MV quantity */
1104     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1105         if (msg[1] & SUBMB_SHAPE_MASK)
1106             inter_msg = 128;
1107     }
1108     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1109     OUT_BCS_BATCH(batch, offset);
1110     inter_msg = msg[0] & (0x1F00FFFF);
1111     inter_msg |= INTER_MV8;
1112     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1113     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1114         (msg[1] & SUBMB_SHAPE_MASK)) {
1115         inter_msg |= INTER_MV32;
1116     }
1117
1118     OUT_BCS_BATCH(batch, inter_msg);
1119
1120     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1121     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1122 #if 0 
1123     if ( slice_type == SLICE_TYPE_B) {
1124         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1125     } else {
1126         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1127     }
1128 #else
1129     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1130 #endif
1131
1132     inter_msg = msg[1] >> 8;
1133     /*Stuff for Inter MB*/
1134     OUT_BCS_BATCH(batch, inter_msg);        
1135     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
1136     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
1137
1138     /*MaxSizeInWord and TargetSzieInWord*/
1139     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1140                   (target_mb_size << 16) );
1141
1142     OUT_BCS_BATCH(batch, 0x0);    
1143
1144     ADVANCE_BCS_BATCH(batch);
1145
1146     return len_in_dwords;
1147 }
1148
1149 static void 
1150 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1151                                         struct encode_state *encode_state,
1152                                         struct intel_encoder_context *encoder_context,
1153                                         int slice_index,
1154                                         struct intel_batchbuffer *slice_batch)
1155 {
1156     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1157     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1158     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1159     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1160     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1161     unsigned int *msg = NULL, offset = 0;
1162     unsigned char *msg_ptr = NULL;
1163     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1164     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1165     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1166     int i,x,y;
1167     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1168     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1169     unsigned char *slice_header = NULL;
1170     int slice_header_length_in_bits = 0;
1171     unsigned int tail_data[] = { 0x0, 0x0 };
1172     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1173     int is_intra = slice_type == SLICE_TYPE_I;
1174
1175     if (rate_control_mode == VA_RC_CBR) {
1176         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1177         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1178     }
1179
1180     /* only support for 8-bit pixel bit-depth */
1181     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1182     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1183     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1184     assert(qp >= 0 && qp < 52);
1185
1186     gen75_mfc_avc_slice_state(ctx,
1187                               pPicParameter,
1188                               pSliceParameter,
1189                               encode_state, encoder_context,
1190                               (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1191
1192     if ( slice_index == 0)
1193         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1194
1195     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1196
1197     // slice hander
1198     mfc_context->insert_object(ctx, encoder_context,
1199                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1200                                5,  /* first 5 bytes are start code + nal unit type */
1201                                1, 0, 1, slice_batch);
1202
1203     free(slice_header);
1204
1205     dri_bo_map(vme_context->vme_output.bo , 1);
1206     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1207
1208     if (is_intra) {
1209         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1210     } else {
1211         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212     }
1213    
1214     for (i = pSliceParameter->macroblock_address; 
1215          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1216         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1217         x = i % width_in_mbs;
1218         y = i / width_in_mbs;
1219         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1220
1221         if (is_intra) {
1222             assert(msg);
1223             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1224         } else {
1225             int inter_rdo, intra_rdo;
1226             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1227             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1228             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
1229             if (intra_rdo < inter_rdo) { 
1230                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1231             } else {
1232                 msg += AVC_INTER_MSG_OFFSET;
1233                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
1234             }
1235         }
1236     }
1237    
1238     dri_bo_unmap(vme_context->vme_output.bo);
1239
1240     if ( last_slice ) {    
1241         mfc_context->insert_object(ctx, encoder_context,
1242                                    tail_data, 2, 8,
1243                                    2, 1, 1, 0, slice_batch);
1244     } else {
1245         mfc_context->insert_object(ctx, encoder_context,
1246                                    tail_data, 1, 8,
1247                                    1, 1, 1, 0, slice_batch);
1248     }
1249
1250
1251 }
1252
1253 static dri_bo *
1254 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1255                                    struct encode_state *encode_state,
1256                                    struct intel_encoder_context *encoder_context)
1257 {
1258     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1259     struct i965_driver_data *i965 = i965_driver_data(ctx);
1260     struct intel_batchbuffer *batch;
1261     dri_bo *batch_bo;
1262     int i;
1263     int buffer_size;
1264
1265     batch = mfc_context->aux_batchbuffer;
1266     batch_bo = batch->buffer;
1267     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1268         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1269     }
1270
1271     intel_batchbuffer_align(batch, 8);
1272     
1273     BEGIN_BCS_BATCH(batch, 2);
1274     OUT_BCS_BATCH(batch, 0);
1275     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1276     ADVANCE_BCS_BATCH(batch);
1277
1278     dri_bo_reference(batch_bo);
1279
1280     intel_batchbuffer_free(batch);
1281     mfc_context->aux_batchbuffer = NULL;
1282
1283     return batch_bo;
1284 }
1285
1286 #else
1287
1288 static void
1289 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1290                                      struct encode_state *encode_state,
1291                                      struct intel_encoder_context *encoder_context)
1292
1293 {
1294     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1295     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1296
1297     assert(vme_context->vme_output.bo);
1298     mfc_context->buffer_suface_setup(ctx,
1299                                      &mfc_context->gpe_context,
1300                                      &vme_context->vme_output,
1301                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1302                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1303 }
1304
1305 static void
1306 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1307                                       struct encode_state *encode_state,
1308                                       struct intel_encoder_context *encoder_context)
1309
1310 {
1311     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1312     assert(mfc_context->aux_batchbuffer_surface.bo);
1313     mfc_context->buffer_suface_setup(ctx,
1314                                      &mfc_context->gpe_context,
1315                                      &mfc_context->aux_batchbuffer_surface,
1316                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1317                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1318 }
1319
1320 static void
1321 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1322                                      struct encode_state *encode_state,
1323                                      struct intel_encoder_context *encoder_context)
1324 {
1325     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1326     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1327 }
1328
1329 static void
1330 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1331                                  struct encode_state *encode_state,
1332                                  struct intel_encoder_context *encoder_context)
1333 {
1334     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1335     struct gen6_interface_descriptor_data *desc;   
1336     int i;
1337     dri_bo *bo;
1338
1339     bo = mfc_context->gpe_context.idrt.bo;
1340     dri_bo_map(bo, 1);
1341     assert(bo->virtual);
1342     desc = bo->virtual;
1343
1344     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1345         struct i965_kernel *kernel;
1346
1347         kernel = &mfc_context->gpe_context.kernels[i];
1348         assert(sizeof(*desc) == 32);
1349
1350         /*Setup the descritor table*/
1351         memset(desc, 0, sizeof(*desc));
1352         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1353         desc->desc2.sampler_count = 0;
1354         desc->desc2.sampler_state_pointer = 0;
1355         desc->desc3.binding_table_entry_count = 2;
1356         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1357         desc->desc4.constant_urb_entry_read_offset = 0;
1358         desc->desc4.constant_urb_entry_read_length = 4;
1359                 
1360         /*kernel start*/
1361         dri_bo_emit_reloc(bo,   
1362                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1363                           0,
1364                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1365                           kernel->bo);
1366         desc++;
1367     }
1368
1369     dri_bo_unmap(bo);
1370 }
1371
1372 static void
1373 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1374                                      struct encode_state *encode_state,
1375                                      struct intel_encoder_context *encoder_context)
1376 {
1377     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1378     
1379     (void)mfc_context;
1380 }
1381
1382 #define AVC_PAK_LEN_IN_BYTE     48
1383 #define AVC_PAK_LEN_IN_OWORD    3
1384
1385 static void
1386 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1387                                           uint32_t intra_flag,
1388                                           int head_offset,
1389                                           int number_mb_cmds,
1390                                           int slice_end_x,
1391                                           int slice_end_y,
1392                                           int mb_x,
1393                                           int mb_y,
1394                                           int width_in_mbs,
1395                                           int qp,
1396                                           uint32_t fwd_ref,
1397                                           uint32_t bwd_ref)
1398 {
1399     uint32_t temp_value;
1400     BEGIN_BATCH(batch, 14);
1401     
1402     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
1403     OUT_BATCH(batch, 0);
1404     OUT_BATCH(batch, 0);
1405     OUT_BATCH(batch, 0);
1406     OUT_BATCH(batch, 0);
1407     OUT_BATCH(batch, 0);
1408    
1409     /*inline data */
1410     OUT_BATCH(batch, head_offset / 16);
1411     OUT_BATCH(batch, (intra_flag) | (qp << 16));
1412     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
1413     OUT_BATCH(batch, temp_value);
1414
1415     OUT_BATCH(batch, number_mb_cmds);
1416
1417     OUT_BATCH(batch,
1418               ((slice_end_y << 8) | (slice_end_x)));
1419     OUT_BATCH(batch, fwd_ref);
1420     OUT_BATCH(batch, bwd_ref);
1421
1422     OUT_BATCH(batch, MI_NOOP);
1423
1424     ADVANCE_BATCH(batch);
1425 }
1426
1427 static void
1428 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1429                                         struct intel_encoder_context *encoder_context,
1430                                         VAEncSliceParameterBufferH264 *slice_param,
1431                                         int head_offset,
1432                                         int qp,
1433                                         int last_slice)
1434 {
1435     struct intel_batchbuffer *batch = encoder_context->base.batch;
1436     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1437     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1438     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1439     int total_mbs = slice_param->num_macroblocks;
1440     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1441     int number_mb_cmds = 128;
1442     int starting_offset = 0;
1443     int mb_x, mb_y;
1444     int last_mb, slice_end_x, slice_end_y;
1445     int remaining_mb = total_mbs;
1446     uint32_t fwd_ref , bwd_ref, mb_flag;
1447
1448     last_mb = slice_param->macroblock_address + total_mbs - 1;
1449     slice_end_x = last_mb % width_in_mbs;
1450     slice_end_y = last_mb / width_in_mbs;
1451
1452     if (slice_type == SLICE_TYPE_I) {
1453         fwd_ref = 0;
1454         bwd_ref = 0;
1455         mb_flag = 1;
1456     } else {
1457         fwd_ref = vme_context->ref_index_in_mb[0];
1458         bwd_ref = vme_context->ref_index_in_mb[1];
1459         mb_flag = 0;
1460     }
1461
1462     if (width_in_mbs >= 100) {
1463         number_mb_cmds = width_in_mbs / 5;
1464     } else if (width_in_mbs >= 80) {
1465         number_mb_cmds = width_in_mbs / 4;
1466     } else if (width_in_mbs >= 60) {
1467         number_mb_cmds = width_in_mbs / 3;
1468     } else if (width_in_mbs >= 40) {
1469         number_mb_cmds = width_in_mbs / 2;
1470     } else {
1471         number_mb_cmds = width_in_mbs;
1472     }
1473
1474     do {
1475         if (number_mb_cmds >= remaining_mb) {
1476                 number_mb_cmds = remaining_mb;
1477         }
1478         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
1479         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
1480
1481         gen75_mfc_batchbuffer_emit_object_command(batch,
1482                                                   mb_flag,
1483                                                   head_offset,
1484                                                   number_mb_cmds,
1485                                                   slice_end_x,
1486                                                   slice_end_y,
1487                                                   mb_x,
1488                                                   mb_y,
1489                                                   width_in_mbs,
1490                                                   qp,
1491                                                   fwd_ref,
1492                                                   bwd_ref);
1493
1494         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
1495         remaining_mb -= number_mb_cmds;
1496         starting_offset += number_mb_cmds;
1497     } while (remaining_mb > 0);
1498 }
1499                           
1500 /*
1501  * return size in Owords (16bytes)
1502  */         
1503 static void
1504 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1505                                 struct encode_state *encode_state,
1506                                 struct intel_encoder_context *encoder_context,
1507                                 int slice_index)
1508 {
1509     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1510     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1511     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1512     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1513     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1514     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1515     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1516     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1517     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1518     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1519     unsigned char *slice_header = NULL;
1520     int slice_header_length_in_bits = 0;
1521     unsigned int tail_data[] = { 0x0, 0x0 };
1522     long head_offset;
1523     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
1524
1525     if (rate_control_mode == VA_RC_CBR) {
1526         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1527         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1528     }
1529
1530     /* only support for 8-bit pixel bit-depth */
1531     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1532     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1533     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1534     assert(qp >= 0 && qp < 52);
1535
1536     gen75_mfc_avc_slice_state(ctx,
1537                               pPicParameter,
1538                               pSliceParameter,
1539                               encode_state,
1540                               encoder_context,
1541                               (rate_control_mode == VA_RC_CBR),
1542                               qp,
1543                               slice_batch);
1544
1545     if (slice_index == 0)
1546         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1547
1548     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1549
1550     // slice hander
1551     mfc_context->insert_object(ctx,
1552                                encoder_context,
1553                                (unsigned int *)slice_header,
1554                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1555                                slice_header_length_in_bits & 0x1f,
1556                                5,  /* first 5 bytes are start code + nal unit type */
1557                                1,
1558                                0,
1559                                1,
1560                                slice_batch);
1561
1562     free(slice_header);
1563
1564     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1565     head_offset = intel_batchbuffer_used_size(slice_batch);
1566
1567     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
1568
1569     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1570                                             encoder_context,
1571                                             pSliceParameter,
1572                                             head_offset,
1573                                             qp,
1574                                             last_slice);
1575
1576
1577     /* Aligned for tail */
1578     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1579     if (last_slice) {    
1580         mfc_context->insert_object(ctx,
1581                                    encoder_context,
1582                                    tail_data,
1583                                    2,
1584                                    8,
1585                                    2,
1586                                    1,
1587                                    1,
1588                                    0,
1589                                    slice_batch);
1590     } else {
1591         mfc_context->insert_object(ctx,
1592                                    encoder_context,
1593                                    tail_data,
1594                                    1,
1595                                    8,
1596                                    1,
1597                                    1,
1598                                    1,
1599                                    0,
1600                                    slice_batch);
1601     }
1602
1603     return;
1604 }
1605
1606 static void
1607 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1608                                    struct encode_state *encode_state,
1609                                    struct intel_encoder_context *encoder_context)
1610 {
1611     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1612     struct intel_batchbuffer *batch = encoder_context->base.batch;
1613     int i;
1614     intel_batchbuffer_start_atomic(batch, 0x4000); 
1615     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1616
1617     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1618         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
1619     }
1620     {
1621         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1622         intel_batchbuffer_align(slice_batch, 8);
1623         BEGIN_BCS_BATCH(slice_batch, 2);
1624         OUT_BCS_BATCH(slice_batch, 0);
1625         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
1626         ADVANCE_BCS_BATCH(slice_batch);
1627         mfc_context->aux_batchbuffer = NULL;
1628         intel_batchbuffer_free(slice_batch);
1629     }
1630     intel_batchbuffer_end_atomic(batch);
1631     intel_batchbuffer_flush(batch);
1632 }
1633
1634 static void
1635 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1636                                 struct encode_state *encode_state,
1637                                 struct intel_encoder_context *encoder_context)
1638 {
1639     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1640     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1641     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1642     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1643 }
1644
1645 static dri_bo *
1646 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1647                                    struct encode_state *encode_state,
1648                                    struct intel_encoder_context *encoder_context)
1649 {
1650     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1651
1652     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1653     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1654
1655     return mfc_context->aux_batchbuffer_surface.bo;
1656 }
1657
1658 #endif
1659
1660 static void
1661 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1662                                   struct encode_state *encode_state,
1663                                   struct intel_encoder_context *encoder_context)
1664 {
1665     struct intel_batchbuffer *batch = encoder_context->base.batch;
1666     dri_bo *slice_batch_bo;
1667
1668     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1669         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1670         assert(0);
1671         return; 
1672     }
1673
1674 #if MFC_SOFTWARE_HASWELL
1675     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1676 #else
1677     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1678 #endif
1679
1680     // begin programing
1681     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1682     intel_batchbuffer_emit_mi_flush(batch);
1683     
1684     // picture level programing
1685     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1686
1687     BEGIN_BCS_BATCH(batch, 2);
1688     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1689     OUT_BCS_RELOC(batch,
1690                   slice_batch_bo,
1691                   I915_GEM_DOMAIN_COMMAND, 0, 
1692                   0);
1693     ADVANCE_BCS_BATCH(batch);
1694
1695     // end programing
1696     intel_batchbuffer_end_atomic(batch);
1697
1698     dri_bo_unreference(slice_batch_bo);
1699 }
1700
1701
1702 static VAStatus
1703 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1704                              struct encode_state *encode_state,
1705                              struct intel_encoder_context *encoder_context)
1706 {
1707     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1708     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1709     int current_frame_bits_size;
1710     int sts;
1711  
1712     for (;;) {
1713         gen75_mfc_init(ctx, encode_state, encoder_context);
1714         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1715         /*Programing bcs pipeline*/
1716         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1717         gen75_mfc_run(ctx, encode_state, encoder_context);
1718         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1719             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1720             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1721             if (sts == BRC_NO_HRD_VIOLATION) {
1722                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1723                 break;
1724             }
1725             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1726                 if (!mfc_context->hrd.violation_noted) {
1727                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1728                     mfc_context->hrd.violation_noted = 1;
1729                 }
1730                 return VA_STATUS_SUCCESS;
1731             }
1732         } else {
1733             break;
1734         }
1735     }
1736
1737     return VA_STATUS_SUCCESS;
1738 }
1739
1740 /*
1741  * MPEG-2
1742  */
1743
1744 static const int
1745 va_to_gen75_mpeg2_picture_type[3] = {
1746     1,  /* I */
1747     2,  /* P */
1748     3   /* B */
1749 };
1750
1751 static void
1752 gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
1753                           struct intel_encoder_context *encoder_context,
1754                           struct encode_state *encode_state)
1755 {
1756     struct intel_batchbuffer *batch = encoder_context->base.batch;
1757     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1758     VAEncPictureParameterBufferMPEG2 *pic_param;
1759     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1760     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1761     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1762
1763     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1764     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1765     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1766
1767     BEGIN_BCS_BATCH(batch, 13);
1768     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1769     OUT_BCS_BATCH(batch,
1770                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1771                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1772                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1773                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1774                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1775                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1776                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1777                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1778                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1779                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1780                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1781                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1782     OUT_BCS_BATCH(batch,
1783                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1784                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
1785                   0);
1786     OUT_BCS_BATCH(batch,
1787                   1 << 31 |     /* slice concealment */
1788                   (height_in_mbs - 1) << 16 |
1789                   (width_in_mbs - 1));
1790     if (slice_param && slice_param->quantiser_scale_code >= 14) 
1791         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
1792     else
1793         OUT_BCS_BATCH(batch, 0);
1794
1795     OUT_BCS_BATCH(batch, 0);
1796     OUT_BCS_BATCH(batch,
1797                   0xFFF << 16 | /* InterMBMaxSize */
1798                   0xFFF << 0 |  /* IntraMBMaxSize */
1799                   0);
1800     OUT_BCS_BATCH(batch, 0);
1801     OUT_BCS_BATCH(batch, 0);
1802     OUT_BCS_BATCH(batch, 0);
1803     OUT_BCS_BATCH(batch, 0);
1804     OUT_BCS_BATCH(batch, 0);
1805     OUT_BCS_BATCH(batch, 0);
1806     ADVANCE_BCS_BATCH(batch);
1807 }
1808
1809 static void
1810 gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1811 {
1812     unsigned char intra_qm[64] = {
1813         8, 16, 19, 22, 26, 27, 29, 34,
1814         16, 16, 22, 24, 27, 29, 34, 37,
1815         19, 22, 26, 27, 29, 34, 34, 38,
1816         22, 22, 26, 27, 29, 34, 37, 40,
1817         22, 26, 27, 29, 32, 35, 40, 48,
1818         26, 27, 29, 32, 35, 40, 48, 58,
1819         26, 27, 29, 34, 38, 46, 56, 69,
1820         27, 29, 35, 38, 46, 56, 69, 83
1821     };
1822
1823     unsigned char non_intra_qm[64] = {
1824         16, 16, 16, 16, 16, 16, 16, 16,
1825         16, 16, 16, 16, 16, 16, 16, 16,
1826         16, 16, 16, 16, 16, 16, 16, 16,
1827         16, 16, 16, 16, 16, 16, 16, 16,
1828         16, 16, 16, 16, 16, 16, 16, 16,
1829         16, 16, 16, 16, 16, 16, 16, 16,
1830         16, 16, 16, 16, 16, 16, 16, 16,
1831         16, 16, 16, 16, 16, 16, 16, 16
1832     };
1833
1834     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1835     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1836 }
1837
1838 static void
1839 gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1840 {
1841     unsigned short intra_fqm[64] = {
1842         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1843         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1844         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1845         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1846         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1847         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1848         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1849         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1850     };
1851
1852     unsigned short non_intra_fqm[64] = {
1853         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1854         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1855         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1856         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1857         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1858         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1859         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1860         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1861     };
1862
1863     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1864     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1865 }
1866
1867 static void
1868 gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1869                                  struct intel_encoder_context *encoder_context,
1870                                  int x, int y,
1871                                  int next_x, int next_y,
1872                                  int is_fisrt_slice_group,
1873                                  int is_last_slice_group,
1874                                  int intra_slice,
1875                                  int qp,
1876                                  struct intel_batchbuffer *batch)
1877 {
1878     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1879
1880     if (batch == NULL)
1881         batch = encoder_context->base.batch;
1882
1883     BEGIN_BCS_BATCH(batch, 8);
1884
1885     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1886     OUT_BCS_BATCH(batch,
1887                   0 << 31 |                             /* MbRateCtrlFlag */
1888                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1889                   1 << 17 |                             /* Insert Header before the first slice group data */
1890                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1891                   1 << 15 |                             /* TailPresentFlag: always 1 */
1892                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1893                   !!intra_slice << 13 |                 /* IntraSlice */
1894                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1895                   0);
1896     OUT_BCS_BATCH(batch,
1897                   next_y << 24 |
1898                   next_x << 16 |
1899                   y << 8 |
1900                   x << 0 |
1901                   0);
1902     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1903     /* bitstream pointer is only loaded once for the first slice of a frame when 
1904      * LoadSlicePointerFlag is 0
1905      */
1906     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1907     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1908     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1909     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1910
1911     ADVANCE_BCS_BATCH(batch);
1912 }
1913
1914 static int
1915 gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1916                                  struct intel_encoder_context *encoder_context,
1917                                  int x, int y,
1918                                  int first_mb_in_slice,
1919                                  int last_mb_in_slice,
1920                                  int first_mb_in_slice_group,
1921                                  int last_mb_in_slice_group,
1922                                  int mb_type,
1923                                  int qp_scale_code,
1924                                  int coded_block_pattern,
1925                                  unsigned char target_size_in_word,
1926                                  unsigned char max_size_in_word,
1927                                  struct intel_batchbuffer *batch)
1928 {
1929     int len_in_dwords = 9;
1930
1931     if (batch == NULL)
1932         batch = encoder_context->base.batch;
1933
1934     BEGIN_BCS_BATCH(batch, len_in_dwords);
1935
1936     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1937     OUT_BCS_BATCH(batch,
1938                   0 << 24 |     /* PackedMvNum */
1939                   0 << 20 |     /* MvFormat */
1940                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1941                   0 << 15 |     /* TransformFlag: frame DCT */
1942                   0 << 14 |     /* FieldMbFlag */
1943                   1 << 13 |     /* IntraMbFlag */
1944                   mb_type << 8 |   /* MbType: Intra */
1945                   0 << 2 |      /* SkipMbFlag */
1946                   0 << 0 |      /* InterMbMode */
1947                   0);
1948     OUT_BCS_BATCH(batch, y << 16 | x);
1949     OUT_BCS_BATCH(batch,
1950                   max_size_in_word << 24 |
1951                   target_size_in_word << 16 |
1952                   coded_block_pattern << 6 |      /* CBP */
1953                   0);
1954     OUT_BCS_BATCH(batch,
1955                   last_mb_in_slice << 31 |
1956                   first_mb_in_slice << 30 |
1957                   0 << 27 |     /* EnableCoeffClamp */
1958                   last_mb_in_slice_group << 26 |
1959                   0 << 25 |     /* MbSkipConvDisable */
1960                   first_mb_in_slice_group << 24 |
1961                   0 << 16 |     /* MvFieldSelect */
1962                   qp_scale_code << 0 |
1963                   0);
1964     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1965     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1966     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1967     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1968
1969     ADVANCE_BCS_BATCH(batch);
1970
1971     return len_in_dwords;
1972 }
1973
1974 #define MPEG2_INTER_MV_OFFSET   12 
1975
1976 static struct _mv_ranges
1977 {
1978     int low;    /* in the unit of 1/2 pixel */
1979     int high;   /* in the unit of 1/2 pixel */
1980 } mv_ranges[] = {
1981     {0, 0},
1982     {-16, 15},
1983     {-32, 31},
1984     {-64, 63},
1985     {-128, 127},
1986     {-256, 255},
1987     {-512, 511},
1988     {-1024, 1023},
1989     {-2048, 2047},
1990     {-4096, 4095}
1991 };
1992
1993 static int
1994 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1995 {
1996     if (mv + pos * 16 * 2 < 0 ||
1997         mv + (pos + 1) * 16 * 2 > display_max * 2)
1998         mv = 0;
1999
2000     if (f_code > 0 && f_code < 10) {
2001         if (mv < mv_ranges[f_code].low)
2002             mv = mv_ranges[f_code].low;
2003
2004         if (mv > mv_ranges[f_code].high)
2005             mv = mv_ranges[f_code].high;
2006     }
2007
2008     return mv;
2009 }
2010
2011 static int
2012 gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
2013                                  struct encode_state *encode_state,
2014                                  struct intel_encoder_context *encoder_context,
2015                                  unsigned int *msg,
2016                                  int width_in_mbs, int height_in_mbs,
2017                                  int x, int y,
2018                                  int first_mb_in_slice,
2019                                  int last_mb_in_slice,
2020                                  int first_mb_in_slice_group,
2021                                  int last_mb_in_slice_group,
2022                                  int qp_scale_code,
2023                                  unsigned char target_size_in_word,
2024                                  unsigned char max_size_in_word,
2025                                  struct intel_batchbuffer *batch)
2026 {
2027     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2028     int len_in_dwords = 9;
2029     short *mvptr, mvx0, mvy0, mvx1, mvy1;
2030     
2031     if (batch == NULL)
2032         batch = encoder_context->base.batch;
2033
2034     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
2035     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
2036     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
2037     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
2038     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
2039
2040     BEGIN_BCS_BATCH(batch, len_in_dwords);
2041
2042     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
2043     OUT_BCS_BATCH(batch,
2044                   2 << 24 |     /* PackedMvNum */
2045                   7 << 20 |     /* MvFormat */
2046                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
2047                   0 << 15 |     /* TransformFlag: frame DCT */
2048                   0 << 14 |     /* FieldMbFlag */
2049                   0 << 13 |     /* IntraMbFlag */
2050                   1 << 8 |      /* MbType: Frame-based */
2051                   0 << 2 |      /* SkipMbFlag */
2052                   0 << 0 |      /* InterMbMode */
2053                   0);
2054     OUT_BCS_BATCH(batch, y << 16 | x);
2055     OUT_BCS_BATCH(batch,
2056                   max_size_in_word << 24 |
2057                   target_size_in_word << 16 |
2058                   0x3f << 6 |   /* CBP */
2059                   0);
2060     OUT_BCS_BATCH(batch,
2061                   last_mb_in_slice << 31 |
2062                   first_mb_in_slice << 30 |
2063                   0 << 27 |     /* EnableCoeffClamp */
2064                   last_mb_in_slice_group << 26 |
2065                   0 << 25 |     /* MbSkipConvDisable */
2066                   first_mb_in_slice_group << 24 |
2067                   0 << 16 |     /* MvFieldSelect */
2068                   qp_scale_code << 0 |
2069                   0);
2070
2071     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
2072     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
2073     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
2074     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
2075
2076     ADVANCE_BCS_BATCH(batch);
2077
2078     return len_in_dwords;
2079 }
2080
2081 static void
2082 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
2083                                            struct encode_state *encode_state,
2084                                            struct intel_encoder_context *encoder_context,
2085                                            struct intel_batchbuffer *slice_batch)
2086 {
2087     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2088     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
2089
2090     if (encode_state->packed_header_data[idx]) {
2091         VAEncPackedHeaderParameterBuffer *param = NULL;
2092         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2093         unsigned int length_in_bits;
2094
2095         assert(encode_state->packed_header_param[idx]);
2096         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2097         length_in_bits = param->bit_length;
2098
2099         mfc_context->insert_object(ctx,
2100                                    encoder_context,
2101                                    header_data,
2102                                    ALIGN(length_in_bits, 32) >> 5,
2103                                    length_in_bits & 0x1f,
2104                                    5,   /* FIXME: check it */
2105                                    0,
2106                                    0,
2107                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2108                                    slice_batch);
2109     }
2110
2111     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
2112
2113     if (encode_state->packed_header_data[idx]) {
2114         VAEncPackedHeaderParameterBuffer *param = NULL;
2115         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2116         unsigned int length_in_bits;
2117
2118         assert(encode_state->packed_header_param[idx]);
2119         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2120         length_in_bits = param->bit_length;
2121
2122         mfc_context->insert_object(ctx,
2123                                    encoder_context,
2124                                    header_data,
2125                                    ALIGN(length_in_bits, 32) >> 5,
2126                                    length_in_bits & 0x1f,
2127                                    5,   /* FIXME: check it */
2128                                    0,
2129                                    0,
2130                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
2131                                    slice_batch);
2132     }
2133 }
2134
2135 static void 
2136 gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
2137                                      struct encode_state *encode_state,
2138                                      struct intel_encoder_context *encoder_context,
2139                                      int slice_index,
2140                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
2141                                      struct intel_batchbuffer *slice_batch)
2142 {
2143     struct gen6_vme_context *vme_context = encoder_context->vme_context;
2144     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2145     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
2146     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
2147     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
2148     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
2149     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
2150     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
2151     int i, j;
2152     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
2153     unsigned int *msg = NULL;
2154     unsigned char *msg_ptr = NULL;
2155
2156     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
2157     h_start_pos = slice_param->macroblock_address % width_in_mbs;
2158     v_start_pos = slice_param->macroblock_address / width_in_mbs;
2159     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
2160
2161     dri_bo_map(vme_context->vme_output.bo , 0);
2162     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
2163
2164     if (next_slice_group_param) {
2165         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
2166         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
2167     } else {
2168         h_next_start_pos = 0;
2169         v_next_start_pos = height_in_mbs;
2170     }
2171
2172     gen75_mfc_mpeg2_slicegroup_state(ctx,
2173                                      encoder_context,
2174                                      h_start_pos,
2175                                      v_start_pos,
2176                                      h_next_start_pos,
2177                                      v_next_start_pos,
2178                                      slice_index == 0,
2179                                      next_slice_group_param == NULL,
2180                                      slice_param->is_intra_slice,
2181                                      slice_param->quantiser_scale_code,
2182                                      slice_batch);
2183
2184     if (slice_index == 0) 
2185         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
2186
2187     /* Insert '00' to make sure the header is valid */
2188     mfc_context->insert_object(ctx,
2189                                encoder_context,
2190                                (unsigned int*)section_delimiter,
2191                                1,
2192                                8,   /* 8bits in the last DWORD */
2193                                1,   /* 1 byte */
2194                                1,
2195                                0,
2196                                0,
2197                                slice_batch);
2198
2199     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
2200         /* PAK for each macroblocks */
2201         for (j = 0; j < slice_param->num_macroblocks; j++) {
2202             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
2203             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
2204             int first_mb_in_slice = (j == 0);
2205             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
2206             int first_mb_in_slice_group = (i == 0 && j == 0);
2207             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
2208                                           j == slice_param->num_macroblocks - 1);
2209
2210             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
2211
2212             if (slice_param->is_intra_slice) {
2213                 gen75_mfc_mpeg2_pak_object_intra(ctx,
2214                                                  encoder_context,
2215                                                  h_pos, v_pos,
2216                                                  first_mb_in_slice,
2217                                                  last_mb_in_slice,
2218                                                  first_mb_in_slice_group,
2219                                                  last_mb_in_slice_group,
2220                                                  0x1a,
2221                                                  slice_param->quantiser_scale_code,
2222                                                  0x3f,
2223                                                  0,
2224                                                  0xff,
2225                                                  slice_batch);
2226             } else {
2227                 int inter_rdo, intra_rdo;
2228                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
2229                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
2230
2231                 if (intra_rdo < inter_rdo) 
2232                     gen75_mfc_mpeg2_pak_object_intra(ctx,
2233                                                      encoder_context,
2234                                                      h_pos, v_pos,
2235                                                      first_mb_in_slice,
2236                                                      last_mb_in_slice,
2237                                                      first_mb_in_slice_group,
2238                                                      last_mb_in_slice_group,
2239                                                      0x1a,
2240                                                      slice_param->quantiser_scale_code,
2241                                                      0x3f,
2242                                                      0,
2243                                                      0xff,
2244                                                      slice_batch);
2245                 else
2246                     gen75_mfc_mpeg2_pak_object_inter(ctx,
2247                                                      encode_state,
2248                                                      encoder_context,
2249                                                      msg,
2250                                                      width_in_mbs, height_in_mbs,
2251                                                      h_pos, v_pos,
2252                                                      first_mb_in_slice,
2253                                                      last_mb_in_slice,
2254                                                      first_mb_in_slice_group,
2255                                                      last_mb_in_slice_group,
2256                                                      slice_param->quantiser_scale_code,
2257                                                      0,
2258                                                      0xff,
2259                                                      slice_batch);
2260             }
2261         }
2262
2263         slice_param++;
2264     }
2265
2266     dri_bo_unmap(vme_context->vme_output.bo);
2267
2268     /* tail data */
2269     if (next_slice_group_param == NULL) { /* end of a picture */
2270         mfc_context->insert_object(ctx,
2271                                    encoder_context,
2272                                    (unsigned int *)tail_delimiter,
2273                                    2,
2274                                    8,   /* 8bits in the last DWORD */
2275                                    5,   /* 5 bytes */
2276                                    1,
2277                                    1,
2278                                    0,
2279                                    slice_batch);
2280     } else {        /* end of a lsice group */
2281         mfc_context->insert_object(ctx,
2282                                    encoder_context,
2283                                    (unsigned int *)section_delimiter,
2284                                    1,
2285                                    8,   /* 8bits in the last DWORD */
2286                                    1,   /* 1 byte */
2287                                    1,
2288                                    1,
2289                                    0,
2290                                    slice_batch);
2291     }
2292 }
2293
2294 /* 
2295  * A batch buffer for all slices, including slice state, 
2296  * slice insert object and slice pak object commands
2297  *
2298  */
2299 static dri_bo *
2300 gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
2301                                            struct encode_state *encode_state,
2302                                            struct intel_encoder_context *encoder_context)
2303 {
2304     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2305     struct intel_batchbuffer *batch;
2306     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
2307     dri_bo *batch_bo;
2308     int i;
2309
2310     batch = mfc_context->aux_batchbuffer;
2311     batch_bo = batch->buffer;
2312
2313     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2314         if (i == encode_state->num_slice_params_ext - 1)
2315             next_slice_group_param = NULL;
2316         else
2317             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2318
2319         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2320     }
2321
2322     intel_batchbuffer_align(batch, 8);
2323     
2324     BEGIN_BCS_BATCH(batch, 2);
2325     OUT_BCS_BATCH(batch, 0);
2326     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2327     ADVANCE_BCS_BATCH(batch);
2328
2329     dri_bo_reference(batch_bo);
2330     intel_batchbuffer_free(batch);
2331     mfc_context->aux_batchbuffer = NULL;
2332
2333     return batch_bo;
2334 }
2335
2336 static void
2337 gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2338                                             struct encode_state *encode_state,
2339                                             struct intel_encoder_context *encoder_context)
2340 {
2341     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2342
2343     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2344     mfc_context->set_surface_state(ctx, encoder_context);
2345     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2346     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
2347     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2348     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2349     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
2350     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
2351 }
2352
2353 static void
2354 gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2355                                     struct encode_state *encode_state,
2356                                     struct intel_encoder_context *encoder_context)
2357 {
2358     struct intel_batchbuffer *batch = encoder_context->base.batch;
2359     dri_bo *slice_batch_bo;
2360
2361     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2362
2363     // begin programing
2364     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2365     intel_batchbuffer_emit_mi_flush(batch);
2366     
2367     // picture level programing
2368     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2369
2370     BEGIN_BCS_BATCH(batch, 2);
2371     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2372     OUT_BCS_RELOC(batch,
2373                   slice_batch_bo,
2374                   I915_GEM_DOMAIN_COMMAND, 0, 
2375                   0);
2376     ADVANCE_BCS_BATCH(batch);
2377
2378     // end programing
2379     intel_batchbuffer_end_atomic(batch);
2380
2381     dri_bo_unreference(slice_batch_bo);
2382 }
2383
2384 static VAStatus
2385 intel_mfc_mpeg2_prepare(VADriverContextP ctx, 
2386                         struct encode_state *encode_state,
2387                         struct intel_encoder_context *encoder_context)
2388 {
2389     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2390     struct object_surface *obj_surface; 
2391     struct object_buffer *obj_buffer;
2392     struct i965_coded_buffer_segment *coded_buffer_segment;
2393     VAStatus vaStatus = VA_STATUS_SUCCESS;
2394     dri_bo *bo;
2395     int i;
2396
2397     /* reconstructed surface */
2398     obj_surface = encode_state->reconstructed_object;
2399     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2400     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2401     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2402     mfc_context->surface_state.width = obj_surface->orig_width;
2403     mfc_context->surface_state.height = obj_surface->orig_height;
2404     mfc_context->surface_state.w_pitch = obj_surface->width;
2405     mfc_context->surface_state.h_pitch = obj_surface->height;
2406
2407     /* forward reference */
2408     obj_surface = encode_state->reference_objects[0];
2409
2410     if (obj_surface && obj_surface->bo) {
2411         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2412         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2413     } else
2414         mfc_context->reference_surfaces[0].bo = NULL;
2415
2416     /* backward reference */
2417     obj_surface = encode_state->reference_objects[1];
2418
2419     if (obj_surface && obj_surface->bo) {
2420         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2421         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2422     } else {
2423         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2424
2425         if (mfc_context->reference_surfaces[1].bo)
2426             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2427     }
2428
2429     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2430         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2431
2432         if (mfc_context->reference_surfaces[i].bo)
2433             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2434     }
2435     
2436     /* input YUV surface */
2437     obj_surface = encode_state->input_yuv_object;
2438     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2439     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2440
2441     /* coded buffer */
2442     obj_buffer = encode_state->coded_buf_object;
2443     bo = obj_buffer->buffer_store->bo;
2444     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2445     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2446     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2447     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2448
2449     /* set the internal flag to 0 to indicate the coded size is unknown */
2450     dri_bo_map(bo, 1);
2451     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2452     coded_buffer_segment->mapped = 0;
2453     coded_buffer_segment->codec = encoder_context->codec;
2454     dri_bo_unmap(bo);
2455
2456     return vaStatus;
2457 }
2458
2459 static VAStatus
2460 gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2461                                struct encode_state *encode_state,
2462                                struct intel_encoder_context *encoder_context)
2463 {
2464     gen75_mfc_init(ctx, encode_state, encoder_context);
2465     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2466     /*Programing bcs pipeline*/
2467     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2468     gen75_mfc_run(ctx, encode_state, encoder_context);
2469
2470     return VA_STATUS_SUCCESS;
2471 }
2472
2473 static void
2474 gen75_mfc_context_destroy(void *context)
2475 {
2476     struct gen6_mfc_context *mfc_context = context;
2477     int i;
2478
2479     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2480     mfc_context->post_deblocking_output.bo = NULL;
2481
2482     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2483     mfc_context->pre_deblocking_output.bo = NULL;
2484
2485     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2486     mfc_context->uncompressed_picture_source.bo = NULL;
2487
2488     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2489     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2490
2491     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2492         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2493         mfc_context->direct_mv_buffers[i].bo = NULL;
2494     }
2495
2496     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2497     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2498
2499     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2500     mfc_context->macroblock_status_buffer.bo = NULL;
2501
2502     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2503     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2504
2505     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2506     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2507
2508     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2509         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2510         mfc_context->reference_surfaces[i].bo = NULL;  
2511     }
2512
2513     i965_gpe_context_destroy(&mfc_context->gpe_context);
2514
2515     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2516     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2517
2518     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2519     mfc_context->aux_batchbuffer_surface.bo = NULL;
2520
2521     if (mfc_context->aux_batchbuffer)
2522         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2523
2524     mfc_context->aux_batchbuffer = NULL;
2525
2526     free(mfc_context);
2527 }
2528
2529 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2530                                    VAProfile profile,
2531                                    struct encode_state *encode_state,
2532                                    struct intel_encoder_context *encoder_context)
2533 {
2534     VAStatus vaStatus;
2535
2536     switch (profile) {
2537     case VAProfileH264ConstrainedBaseline:
2538     case VAProfileH264Main:
2539     case VAProfileH264High:
2540     case VAProfileH264MultiviewHigh:
2541     case VAProfileH264StereoHigh:
2542         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2543         break;
2544
2545         /* FIXME: add for other profile */
2546     case VAProfileMPEG2Simple:
2547     case VAProfileMPEG2Main:
2548         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2549         break;
2550
2551     default:
2552         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2553         break;
2554     }
2555
2556     return vaStatus;
2557 }
2558
2559 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2560 {
2561     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2562
2563     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2564
2565     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2566     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2567
2568     mfc_context->gpe_context.curbe.length = 32 * 4;
2569
2570     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2571     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2572     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2573     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2574     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2575
2576     i965_gpe_load_kernels(ctx,
2577                           &mfc_context->gpe_context,
2578                           gen75_mfc_kernels,
2579                           1);
2580
2581     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2582     mfc_context->set_surface_state = gen75_mfc_surface_state;
2583     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2584     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2585     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2586     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2587     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2588     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2589
2590     encoder_context->mfc_context = mfc_context;
2591     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2592     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2593     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2594
2595     return True;
2596 }