Add the separated files for media encoder on haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44
45 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
46 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
47 };
48
49 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
51 };
52
53 static struct i965_kernel gen75_mfc_kernels[] = {
54     {
55         "MFC AVC INTRA BATCHBUFFER ",
56         MFC_BATCHBUFFER_AVC_INTRA,
57         gen75_mfc_batchbuffer_avc_intra,
58         sizeof(gen75_mfc_batchbuffer_avc_intra),
59         NULL
60     },
61
62     {
63         "MFC AVC INTER BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTER,
65         gen75_mfc_batchbuffer_avc_inter,
66         sizeof(gen75_mfc_batchbuffer_avc_inter),
67         NULL
68     },
69 };
70
71 static void
72 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
73                           int standard_select,
74                           struct intel_encoder_context *encoder_context)
75 {
76     struct intel_batchbuffer *batch = encoder_context->base.batch;
77     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
78
79     assert(standard_select == MFX_FORMAT_MPEG2 ||
80            standard_select == MFX_FORMAT_AVC);
81
82     BEGIN_BCS_BATCH(batch, 5);
83
84     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
85     OUT_BCS_BATCH(batch,
86                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
87                   (MFD_MODE_VLD << 15) | /* VLD mode */
88                   (1 << 10) | /* Stream-Out Enable */
89                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
90                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
112 {
113     struct intel_batchbuffer *batch = encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 18) |
122                   ((mfc_context->surface_state.width - 1) << 4));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be tiled */
130                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135
136     ADVANCE_BCS_BATCH(batch);
137 }
138
139 static void
140 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
141 {
142     struct intel_batchbuffer *batch = encoder_context->base.batch;
143     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
144     struct gen6_vme_context *vme_context = encoder_context->vme_context;
145
146     BEGIN_BCS_BATCH(batch, 11);
147
148     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
149     OUT_BCS_BATCH(batch, 0);
150     OUT_BCS_BATCH(batch, 0);
151     /* MFX Indirect MV Object Base Address */
152     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
153     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
154     OUT_BCS_BATCH(batch, 0);
155     OUT_BCS_BATCH(batch, 0);
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch, 0);
158     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
159     OUT_BCS_RELOC(batch,
160                   mfc_context->mfc_indirect_pak_bse_object.bo,
161                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
162                   0);
163     OUT_BCS_RELOC(batch,
164                   mfc_context->mfc_indirect_pak_bse_object.bo,
165                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
166                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
167
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
173                        struct intel_encoder_context *encoder_context)
174 {
175     struct intel_batchbuffer *batch = encoder_context->base.batch;
176     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
177     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
178
179     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
180     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
181
182     BEGIN_BCS_BATCH(batch, 16);
183
184     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
185     OUT_BCS_BATCH(batch,
186                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
187     OUT_BCS_BATCH(batch, 
188                   ((height_in_mbs - 1) << 16) | 
189                   ((width_in_mbs - 1) << 0));
190     OUT_BCS_BATCH(batch, 
191                   (0 << 24) |   /* Second Chroma QP Offset */
192                   (0 << 16) |   /* Chroma QP Offset */
193                   (0 << 14) |   /* Max-bit conformance Intra flag */
194                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
195                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
196                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
197                   (0 << 8)  |   /* FIXME: Image Structure */
198                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
199     OUT_BCS_BATCH(batch,
200                   (0 << 16) |   /* Mininum Frame size */
201                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
202                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
203                   (0 << 13) |   /* CABAC 0 word insertion test enable */
204                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
205                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
206                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
207                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
208                   (0 << 6)  |   /* Only valid for VLD decoding mode */
209                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
210                   (0 << 4)  |   /* Direct 8x8 inference flag */
211                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
212                   (1 << 2)  |   /* Frame MB only flag */
213                   (0 << 1)  |   /* MBAFF mode is in active */
214                   (0 << 0));    /* Field picture flag */
215     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
216     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
217                   (0xBB8 << 16) |       /* InterMbMaxSz */
218                   (0xEE8) );            /* IntraMbMaxSz */
219     OUT_BCS_BATCH(batch, 0);            /* Reserved */
220     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
221     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
222     OUT_BCS_BATCH(batch, 0x8C000000);
223     OUT_BCS_BATCH(batch, 0x00010000);
224     OUT_BCS_BATCH(batch, 0);
225     OUT_BCS_BATCH(batch, 0);
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228
229     ADVANCE_BCS_BATCH(batch);
230 }
231
232 static void
233 gen75_mfc_qm_state(VADriverContextP ctx,
234                   int qm_type,
235                   unsigned int *qm,
236                   int qm_length,
237                   struct intel_encoder_context *encoder_context)
238 {
239     struct intel_batchbuffer *batch = encoder_context->base.batch;
240     unsigned int qm_buffer[16];
241
242     assert(qm_length <= 16);
243     assert(sizeof(*qm) == 4);
244     memcpy(qm_buffer, qm, qm_length * 4);
245
246     BEGIN_BCS_BATCH(batch, 18);
247     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
248     OUT_BCS_BATCH(batch, qm_type << 0);
249     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
250     ADVANCE_BCS_BATCH(batch);
251 }
252
253 static void
254 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
255 {
256     unsigned int qm[16] = {
257         0x10101010, 0x10101010, 0x10101010, 0x10101010,
258         0x10101010, 0x10101010, 0x10101010, 0x10101010,
259         0x10101010, 0x10101010, 0x10101010, 0x10101010,
260         0x10101010, 0x10101010, 0x10101010, 0x10101010
261     };
262
263     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
264     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
265     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
266     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
267 }
268
269 static void
270 gen75_mfc_fqm_state(VADriverContextP ctx,
271                    int fqm_type,
272                    unsigned int *fqm,
273                    int fqm_length,
274                    struct intel_encoder_context *encoder_context)
275 {
276     struct intel_batchbuffer *batch = encoder_context->base.batch;
277     unsigned int fqm_buffer[32];
278
279     assert(fqm_length <= 32);
280     assert(sizeof(*fqm) == 4);
281     memcpy(fqm_buffer, fqm, fqm_length * 4);
282
283     BEGIN_BCS_BATCH(batch, 34);
284     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
285     OUT_BCS_BATCH(batch, fqm_type << 0);
286     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
287     ADVANCE_BCS_BATCH(batch);
288 }
289
290 static void
291 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
292 {
293     unsigned int qm[32] = {
294         0x10001000, 0x10001000, 0x10001000, 0x10001000,
295         0x10001000, 0x10001000, 0x10001000, 0x10001000,
296         0x10001000, 0x10001000, 0x10001000, 0x10001000,
297         0x10001000, 0x10001000, 0x10001000, 0x10001000,
298         0x10001000, 0x10001000, 0x10001000, 0x10001000,
299         0x10001000, 0x10001000, 0x10001000, 0x10001000,
300         0x10001000, 0x10001000, 0x10001000, 0x10001000,
301         0x10001000, 0x10001000, 0x10001000, 0x10001000
302     };
303
304     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
305     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
306     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
307     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
308 }
309
310 static void
311 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
312                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
313                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
314                            struct intel_batchbuffer *batch)
315 {
316     if (batch == NULL)
317         batch = encoder_context->base.batch;
318
319     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
320
321     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
322     OUT_BCS_BATCH(batch,
323                   (0 << 16) |   /* always start at offset 0 */
324                   (data_bits_in_last_dw << 8) |
325                   (skip_emul_byte_count << 4) |
326                   (!!emulation_flag << 3) |
327                   ((!!is_last_header) << 2) |
328                   ((!!is_end_of_slice) << 1) |
329                   (0 << 0));    /* FIXME: ??? */
330     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
331
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335
336 static void
337 gen75_mfc_bit_rate_control_context_init(struct encode_state *encode_state, 
338                                        struct gen6_mfc_context *mfc_context)
339 {
340     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
341     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
342     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
343     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
344     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
345     int intra_mb_size = inter_mb_size * 5.0;
346     int i;
347
348     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
349     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
350     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
351     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
352     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
353     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
354
355     for(i = 0 ; i < 3; i++) {
356         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
357         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
358         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
359         mfc_context->bit_rate_control_context[i].GrowInit = 6;
360         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
361         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
362         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
363         
364         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
365         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
366         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
367         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
368         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
369         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
370     }
371     
372     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
373     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
374     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
375
376     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
377     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
378     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
379 }
380
381 static void
382 gen75_mfc_brc_init(struct encode_state *encode_state,
383                   struct intel_encoder_context* encoder_context)
384 {
385     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
386     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
387     VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
388     VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
389     double bitrate = pSequenceParameter->bits_per_second;
390     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
391     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
392     int intra_period = pSequenceParameter->intra_period;
393     int ip_period = pSequenceParameter->ip_period;
394     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
395     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
396     double bpf;
397
398     if (pSequenceParameter->ip_period) {
399         pnum = (intra_period + ip_period - 1)/ip_period - 1;
400         bnum = intra_period - inum - pnum;
401     }
402
403     mfc_context->brc.mode = encoder_context->rate_control_mode;
404
405     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
406                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
407     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
408     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
409
410     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
411     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
412     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
413
414     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
415
416     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
417     mfc_context->hrd.current_buffer_fullness =
418         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
419             pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
420     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
421     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
422     mfc_context->hrd.violation_noted = 0;
423
424     if ((bpf > qp51_size) && (bpf < qp1_size)) {
425         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
426     }
427     else if (bpf >= qp1_size)
428         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
429     else if (bpf <= qp51_size)
430         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
431
432     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
433     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
434
435     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
436     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
437     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
438 }
439
440 static void 
441 gen75_mfc_hrd_context_init(struct encode_state *encode_state,
442                           struct intel_encoder_context *encoder_context)
443 {
444     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
445     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
446     unsigned int rate_control_mode = encoder_context->rate_control_mode;
447     int target_bit_rate = pSequenceParameter->bits_per_second;
448     
449     // current we only support CBR mode.
450     if (rate_control_mode == VA_RC_CBR) {
451         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
452         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
453         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
454         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
455         mfc_context->vui_hrd.i_frame_number = 0;
456
457         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24; 
458         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
459         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
460     }
461
462 }
463
464 static void 
465 gen75_mfc_free_avc_surface(void **data)
466 {
467     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
468
469     if (!avc_surface)
470         return;
471
472     dri_bo_unreference(avc_surface->dmv_top);
473     avc_surface->dmv_top = NULL;
474     dri_bo_unreference(avc_surface->dmv_bottom);
475     avc_surface->dmv_bottom = NULL;
476
477     free(avc_surface);
478     *data = NULL;
479 }
480
481 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
482 {
483     struct i965_driver_data *i965 = i965_driver_data(ctx);
484     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
485     dri_bo *bo;
486     int i;
487
488     /*Encode common setup for MFC*/
489     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
490     mfc_context->post_deblocking_output.bo = NULL;
491
492     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
493     mfc_context->pre_deblocking_output.bo = NULL;
494
495     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
496     mfc_context->uncompressed_picture_source.bo = NULL;
497
498     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
499     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
500
501     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
502         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
503         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
504         mfc_context->direct_mv_buffers[i].bo = NULL;
505     }
506
507     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
508         if (mfc_context->reference_surfaces[i].bo != NULL)
509             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
510         mfc_context->reference_surfaces[i].bo = NULL;  
511     }
512
513     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
514     bo = dri_bo_alloc(i965->intel.bufmgr,
515                       "Buffer",
516                       128 * 64,
517                       64);
518     assert(bo);
519     mfc_context->intra_row_store_scratch_buffer.bo = bo;
520
521     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
522     bo = dri_bo_alloc(i965->intel.bufmgr,
523                       "Buffer",
524                       128*128*16,
525                       64);
526     assert(bo);
527     mfc_context->macroblock_status_buffer.bo = bo;
528
529     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
530     bo = dri_bo_alloc(i965->intel.bufmgr,
531                       "Buffer",
532                       49152,  /* 6 * 128 * 64 */
533                       64);
534     assert(bo);
535     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
536
537     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
538     bo = dri_bo_alloc(i965->intel.bufmgr,
539                       "Buffer",
540                       12288, /* 1.5 * 128 * 64 */
541                       0x1000);
542     assert(bo);
543     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
544
545     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
546     mfc_context->mfc_batchbuffer_surface.bo = NULL;
547
548     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
549     mfc_context->aux_batchbuffer_surface.bo = NULL;
550
551     if (mfc_context->aux_batchbuffer)
552         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
553
554     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
555     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
556     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
557     mfc_context->aux_batchbuffer_surface.pitch = 16;
558     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
559     mfc_context->aux_batchbuffer_surface.size_block = 16;
560
561     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
562 }
563
564 static void
565 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
566 {
567     struct intel_batchbuffer *batch = encoder_context->base.batch;
568     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
569     int i;
570
571     BEGIN_BCS_BATCH(batch, 24);
572
573     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
574
575     if (mfc_context->pre_deblocking_output.bo)
576         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
577                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
578                       0);
579     else
580         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
581
582     if (mfc_context->post_deblocking_output.bo)
583         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
584                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
585                       0);                                                                                       /* post output addr  */ 
586     else
587         OUT_BCS_BATCH(batch, 0);
588
589     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
590                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
591                   0);                                                                                   /* uncompressed data */
592     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
593                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
594                   0);                                                                                   /* StreamOut data*/
595     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
596                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
597                   0);   
598     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
599                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
600                   0);
601     /* 7..22 Reference pictures*/
602     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
603         if ( mfc_context->reference_surfaces[i].bo != NULL) {
604             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
605                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
606                           0);                   
607         } else {
608             OUT_BCS_BATCH(batch, 0);
609         }
610     }
611     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
612                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
613                   0);                                                                                   /* Macroblock status buffer*/
614
615     ADVANCE_BCS_BATCH(batch);
616 }
617
618 static void
619 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
620 {
621     struct intel_batchbuffer *batch = encoder_context->base.batch;
622     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
623
624     int i;
625
626     BEGIN_BCS_BATCH(batch, 69);
627
628     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
629
630     /* Reference frames and Current frames */
631     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
632         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
633             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
634                           I915_GEM_DOMAIN_INSTRUCTION, 0,
635                           0);
636         } else {
637             OUT_BCS_BATCH(batch, 0);
638         }
639     }
640
641     /* POL list */
642     for(i = 0; i < 32; i++) {
643         OUT_BCS_BATCH(batch, i/2);
644     }
645     OUT_BCS_BATCH(batch, 0);
646     OUT_BCS_BATCH(batch, 0);
647
648     ADVANCE_BCS_BATCH(batch);
649 }
650
651 static void
652 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
653 {
654     struct intel_batchbuffer *batch = encoder_context->base.batch;
655     int i;
656
657     BEGIN_BCS_BATCH(batch, 10);
658     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
659     OUT_BCS_BATCH(batch, 0);                  //Select L0
660     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
661     for(i = 0; i < 7; i++) {
662         OUT_BCS_BATCH(batch, 0x80808080);
663     }   
664     ADVANCE_BCS_BATCH(batch);
665
666     BEGIN_BCS_BATCH(batch, 10);
667     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
668     OUT_BCS_BATCH(batch, 1);                  //Select L1
669     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
670     for(i = 0; i < 7; i++) {
671         OUT_BCS_BATCH(batch, 0x80808080);
672     }   
673     ADVANCE_BCS_BATCH(batch);
674 }
675
676 static void
677 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
678 {
679     struct intel_batchbuffer *batch = encoder_context->base.batch;
680     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
681
682     BEGIN_BCS_BATCH(batch, 4);
683
684     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
685     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
686                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
687                   0);
688     OUT_BCS_BATCH(batch, 0);
689     OUT_BCS_BATCH(batch, 0);
690
691     ADVANCE_BCS_BATCH(batch);
692 }
693
694
695 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
696                                       struct encode_state *encode_state,
697                                       struct intel_encoder_context *encoder_context)
698 {
699     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
700
701     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
702     mfc_context->set_surface_state(ctx, encoder_context);
703     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
704     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
705     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
706     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
707     mfc_context->avc_qm_state(ctx, encoder_context);
708     mfc_context->avc_fqm_state(ctx, encoder_context);
709     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
710     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
711 }
712
713
714 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
715                                      struct encode_state *encode_state,
716                                      struct intel_encoder_context *encoder_context)
717 {
718     struct i965_driver_data *i965 = i965_driver_data(ctx);
719     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
720     struct object_surface *obj_surface; 
721     struct object_buffer *obj_buffer;
722     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
723     dri_bo *bo;
724     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
725     VAStatus vaStatus = VA_STATUS_SUCCESS;
726     int i, j, enable_avc_ildb = 0;
727     VAEncSliceParameterBufferH264 *slice_param;
728     VACodedBufferSegment *coded_buffer_segment;
729     unsigned char *flag = NULL;
730
731     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
732         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
733         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
734
735         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
736             assert((slice_param->slice_type == SLICE_TYPE_I) ||
737                    (slice_param->slice_type == SLICE_TYPE_SI) ||
738                    (slice_param->slice_type == SLICE_TYPE_P) ||
739                    (slice_param->slice_type == SLICE_TYPE_SP) ||
740                    (slice_param->slice_type == SLICE_TYPE_B));
741
742             if (slice_param->disable_deblocking_filter_idc != 1) {
743                 enable_avc_ildb = 1;
744                 break;
745             }
746
747             slice_param++;
748         }
749     }
750
751     /*Setup all the input&output object*/
752
753     /* Setup current frame and current direct mv buffer*/
754     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
755     assert(obj_surface);
756     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
757
758     if ( obj_surface->private_data == NULL) {
759         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
760         gen6_avc_surface->dmv_top = 
761             dri_bo_alloc(i965->intel.bufmgr,
762                          "Buffer",
763                          68*8192, 
764                          64);
765         gen6_avc_surface->dmv_bottom = 
766             dri_bo_alloc(i965->intel.bufmgr,
767                          "Buffer",
768                          68*8192, 
769                          64);
770         assert(gen6_avc_surface->dmv_top);
771         assert(gen6_avc_surface->dmv_bottom);
772         obj_surface->private_data = (void *)gen6_avc_surface;
773         obj_surface->free_private_data = (void *)gen75_mfc_free_avc_surface; 
774     }
775     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
776     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
777     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
778     dri_bo_reference(gen6_avc_surface->dmv_top);
779     dri_bo_reference(gen6_avc_surface->dmv_bottom);
780
781     if (enable_avc_ildb) {
782         mfc_context->post_deblocking_output.bo = obj_surface->bo;
783         dri_bo_reference(mfc_context->post_deblocking_output.bo);
784     } else {
785         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
786         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
787     }
788
789     mfc_context->surface_state.width = obj_surface->orig_width;
790     mfc_context->surface_state.height = obj_surface->orig_height;
791     mfc_context->surface_state.w_pitch = obj_surface->width;
792     mfc_context->surface_state.h_pitch = obj_surface->height;
793     
794     /* Setup reference frames and direct mv buffers*/
795     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
796         if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
797             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
798             assert(obj_surface);
799             if (obj_surface->bo != NULL) {
800                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
801                 dri_bo_reference(obj_surface->bo);
802             }
803             /* Check DMV buffer */
804             if ( obj_surface->private_data == NULL) {
805                 
806                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
807                 gen6_avc_surface->dmv_top = 
808                     dri_bo_alloc(i965->intel.bufmgr,
809                                  "Buffer",
810                                  68*8192, 
811                                  64);
812                 gen6_avc_surface->dmv_bottom = 
813                     dri_bo_alloc(i965->intel.bufmgr,
814                                  "Buffer",
815                                  68*8192, 
816                                  64);
817                 assert(gen6_avc_surface->dmv_top);
818                 assert(gen6_avc_surface->dmv_bottom);
819                 obj_surface->private_data = gen6_avc_surface;
820                 obj_surface->free_private_data = gen75_mfc_free_avc_surface; 
821             }
822     
823             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
824             /* Setup DMV buffer */
825             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
826             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
827             dri_bo_reference(gen6_avc_surface->dmv_top);
828             dri_bo_reference(gen6_avc_surface->dmv_bottom);
829         } else {
830             break;
831         }
832     }
833         
834     obj_surface = SURFACE(encoder_context->input_yuv_surface);
835     assert(obj_surface && obj_surface->bo);
836     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
837     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
838
839     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
840     bo = obj_buffer->buffer_store->bo;
841     assert(bo);
842     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
843     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
844     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
845     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
846     
847     dri_bo_map(bo, 1);
848     coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
849     flag = (unsigned char *)(coded_buffer_segment + 1);
850     *flag = 0;
851     dri_bo_unmap(bo);
852
853     return vaStatus;
854 }
855
856
857 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
858                              struct encode_state *encode_state,
859                              struct intel_encoder_context *encoder_context)
860 {
861     struct intel_batchbuffer *batch = encoder_context->base.batch;
862
863     intel_batchbuffer_flush(batch);             //run the pipeline
864
865     return VA_STATUS_SUCCESS;
866 }
867
868
869 static VAStatus
870 gen75_mfc_stop(VADriverContextP ctx, 
871               struct encode_state *encode_state,
872               struct intel_encoder_context *encoder_context,
873               int *encoded_bits_size)
874 {
875     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
876     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
877     VACodedBufferSegment *coded_buffer_segment;
878     
879     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
880     assert(vaStatus == VA_STATUS_SUCCESS);
881     *encoded_bits_size = coded_buffer_segment->size * 8;
882     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
883
884     return VA_STATUS_SUCCESS;
885 }
886
887
888 static int gen75_mfc_update_hrd(struct encode_state *encode_state,
889                                struct gen6_mfc_context *mfc_context,
890                                int frame_bits)
891 {
892     double prev_bf = mfc_context->hrd.current_buffer_fullness;
893
894     mfc_context->hrd.current_buffer_fullness -= frame_bits;
895
896     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
897         mfc_context->hrd.current_buffer_fullness = prev_bf;
898         return BRC_UNDERFLOW;
899     }
900     
901     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
902     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
903         if (mfc_context->brc.mode == VA_RC_VBR)
904             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
905         else {
906             mfc_context->hrd.current_buffer_fullness = prev_bf;
907             return BRC_OVERFLOW;
908         }
909     }
910     return BRC_NO_HRD_VIOLATION;
911 }
912
913
914 static int gen75_mfc_brc_postpack(struct encode_state *encode_state,
915                                  struct gen6_mfc_context *mfc_context,
916                                  int frame_bits)
917 {
918     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
919     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
920     int slicetype = pSliceParameter->slice_type;
921     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
922     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
923     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
924     int qp; // quantizer of previously encoded slice of current type
925     int qpn; // predicted quantizer for next frame of current type in integer format
926     double qpf; // predicted quantizer for next frame of current type in float format
927     double delta_qp; // QP correction
928     int target_frame_size, frame_size_next;
929     /* Notes:
930      *  x - how far we are from HRD buffer borders
931      *  y - how far we are from target HRD buffer fullness
932      */
933     double x, y;
934     double frame_size_alpha;
935
936     if (slicetype == SLICE_TYPE_SP)
937         slicetype = SLICE_TYPE_P;
938     else if (slicetype == SLICE_TYPE_SI)
939         slicetype = SLICE_TYPE_I;
940
941     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
942
943     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
944     if (mfc_context->hrd.buffer_capacity < 5)
945         frame_size_alpha = 0;
946     else
947         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
948     if (frame_size_alpha > 30) frame_size_alpha = 30;
949     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
950                                           (double)(frame_size_alpha + 1.);
951
952     /* frame_size_next: avoiding negative number and too small value */
953     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
954         frame_size_next = (int)((double)target_frame_size * 0.25);
955
956     qpf = (double)qp * target_frame_size / frame_size_next;
957     qpn = (int)(qpf + 0.5);
958
959     if (qpn == qp) {
960         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
961         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
962         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
963             qpn++;
964             mfc_context->brc.qpf_rounding_accumulator = 0.;
965         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
966             qpn--;
967             mfc_context->brc.qpf_rounding_accumulator = 0.;
968         }
969     }
970     /* making sure that QP is not changing too fast */
971     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
972     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
973     /* making sure that with QP predictions we did do not leave QPs range */
974     BRC_CLIP(qpn, 1, 51);
975
976     /* checking wthether HRD compliance is still met */
977     sts = gen75_mfc_update_hrd(encode_state, mfc_context, frame_bits);
978
979     /* calculating QP delta as some function*/
980     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
981     if (x > 0) {
982         x /= mfc_context->hrd.target_buffer_fullness;
983         y = mfc_context->hrd.current_buffer_fullness;
984     }
985     else {
986         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
987         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
988     }
989     if (y < 0.01) y = 0.01;
990     if (x > 1) x = 1;
991     else if (x < -1) x = -1;
992
993     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
994     qpn = (int)(qpn + delta_qp + 0.5);
995
996     /* making sure that with QP predictions we did do not leave QPs range */
997     BRC_CLIP(qpn, 1, 51);
998
999     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
1000         /* correcting QPs of slices of other types */
1001         if (slicetype == SLICE_TYPE_P) {
1002             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
1003                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
1004             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
1005                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
1006         } else if (slicetype == SLICE_TYPE_I) {
1007             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
1008                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
1009             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
1010                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
1011         } else { // SLICE_TYPE_B
1012             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
1013                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
1014             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
1015                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
1016         }
1017         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
1018         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
1019         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
1020     } else if (sts == BRC_UNDERFLOW) { // underflow
1021         if (qpn <= qp) qpn = qp + 1;
1022         if (qpn > 51) {
1023             qpn = 51;
1024             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
1025         }
1026     } else if (sts == BRC_OVERFLOW) {
1027         if (qpn >= qp) qpn = qp - 1;
1028         if (qpn < 1) { // < 0 (?) overflow with minQP
1029             qpn = 1;
1030             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
1031         }
1032     }
1033
1034     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
1035
1036     return sts;
1037 }
1038
1039 static void 
1040 gen75_mfc_hrd_context_update(struct encode_state *encode_state, 
1041                           struct gen6_mfc_context *mfc_context) 
1042 {
1043     mfc_context->vui_hrd.i_frame_number++;
1044 }
1045
1046
1047 static int interlace_check(VADriverContextP ctx,
1048                    struct encode_state *encode_state,
1049                    struct intel_encoder_context *encoder_context)
1050 {
1051     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1052     VAEncSliceParameterBufferH264 *pSliceParameter;
1053     int i;
1054     int mbCount = 0;
1055     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1056     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1057   
1058     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1059         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer; 
1060         mbCount += pSliceParameter->num_macroblocks; 
1061     }
1062     
1063     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
1064         return 0;
1065
1066     return 1;
1067 }
1068
1069 static void
1070 gen75_mfc_avc_slice_state(VADriverContextP ctx,
1071                          VAEncPictureParameterBufferH264 *pic_param,
1072                          VAEncSliceParameterBufferH264 *slice_param,
1073                          struct encode_state *encode_state,
1074                          struct intel_encoder_context *encoder_context,
1075                          int rate_control_enable,
1076                          int qp,
1077                          struct intel_batchbuffer *batch)
1078 {
1079     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1080     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1081     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1082     int beginmb = slice_param->macroblock_address;
1083     int endmb = beginmb + slice_param->num_macroblocks;
1084     int beginx = beginmb % width_in_mbs;
1085     int beginy = beginmb / width_in_mbs;
1086     int nextx =  endmb % width_in_mbs;
1087     int nexty = endmb / width_in_mbs;
1088     int slice_type = slice_param->slice_type;
1089     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
1090     int bit_rate_control_target, maxQpN, maxQpP;
1091     unsigned char correct[6], grow, shrink;
1092     int i;
1093     int weighted_pred_idc = 0;
1094     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
1095     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
1096
1097     if (batch == NULL)
1098         batch = encoder_context->base.batch;
1099
1100     bit_rate_control_target = slice_type;
1101     if (slice_type == SLICE_TYPE_SP)
1102         bit_rate_control_target = SLICE_TYPE_P;
1103     else if (slice_type == SLICE_TYPE_SI)
1104         bit_rate_control_target = SLICE_TYPE_I;
1105
1106     if (slice_type == SLICE_TYPE_P) {
1107         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
1108     } else if (slice_type == SLICE_TYPE_B) {
1109         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1110
1111         if (weighted_pred_idc == 2) {
1112             /* 8.4.3 - Derivation process for prediction weights (8-279) */
1113             luma_log2_weight_denom = 5;
1114             chroma_log2_weight_denom = 5;
1115         }
1116     }
1117
1118     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
1119     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
1120
1121     for (i = 0; i < 6; i++)
1122         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
1123
1124     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
1125         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
1126     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
1127         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
1128
1129     BEGIN_BCS_BATCH(batch, 11);;
1130
1131     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
1132     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
1133
1134     if (slice_type == SLICE_TYPE_I) {
1135         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
1136     } else {
1137         OUT_BCS_BATCH(batch,
1138                       (1 << 16) |                       /*1 reference frame*/
1139                       (chroma_log2_weight_denom << 8) |
1140                       (luma_log2_weight_denom << 0));
1141     }
1142
1143     OUT_BCS_BATCH(batch, 
1144                   (weighted_pred_idc << 30) |
1145                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
1146                   (slice_param->disable_deblocking_filter_idc << 27) |
1147                   (slice_param->cabac_init_idc << 24) |
1148                   (qp<<16) |                    /*Slice Quantization Parameter*/
1149                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1150                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1151     OUT_BCS_BATCH(batch,
1152                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
1153                   (beginx << 16) |
1154                   slice_param->macroblock_address );
1155     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
1156     OUT_BCS_BATCH(batch, 
1157                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
1158                   (1 << 30) |           /*ResetRateControlCounter*/
1159                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
1160                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
1161                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
1162                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
1163                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
1164                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
1165                   (last_slice << 19) |     /*IsLastSlice*/
1166                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
1167                   (1 << 17) |       /*HeaderPresentFlag*/       
1168                   (1 << 16) |       /*SliceData PresentFlag*/
1169                   (1 << 15) |       /*TailPresentFlag*/
1170                   (1 << 13) |       /*RBSP NAL TYPE*/   
1171                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
1172     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1173     OUT_BCS_BATCH(batch,
1174                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
1175                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
1176                   (shrink << 8)  |
1177                   (grow << 0));   
1178     OUT_BCS_BATCH(batch,
1179                   (correct[5] << 20) |
1180                   (correct[4] << 16) |
1181                   (correct[3] << 12) |
1182                   (correct[2] << 8) |
1183                   (correct[1] << 4) |
1184                   (correct[0] << 0));
1185     OUT_BCS_BATCH(batch, 0);
1186
1187     ADVANCE_BCS_BATCH(batch);
1188 }
1189
1190
1191 static void gen75_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
1192                                                     struct encode_state *encode_state,
1193                                                     struct intel_encoder_context *encoder_context,
1194                                                     struct intel_batchbuffer *slice_batch)
1195 {
1196     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1197     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
1198
1199     if (encode_state->packed_header_data[idx]) {
1200         VAEncPackedHeaderParameterBuffer *param = NULL;
1201         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1202         unsigned int length_in_bits;
1203
1204         assert(encode_state->packed_header_param[idx]);
1205         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1206         length_in_bits = param->bit_length;
1207
1208         mfc_context->insert_object(ctx,
1209                                    encoder_context,
1210                                    header_data,
1211                                    ALIGN(length_in_bits, 32) >> 5,
1212                                    length_in_bits & 0x1f,
1213                                    5,   /* FIXME: check it */
1214                                    0,
1215                                    0,
1216                                    !param->has_emulation_bytes,
1217                                    slice_batch);
1218     }
1219
1220     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
1221
1222     if (encode_state->packed_header_data[idx]) {
1223         VAEncPackedHeaderParameterBuffer *param = NULL;
1224         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1225         unsigned int length_in_bits;
1226
1227         assert(encode_state->packed_header_param[idx]);
1228         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1229         length_in_bits = param->bit_length;
1230
1231         mfc_context->insert_object(ctx,
1232                                    encoder_context,
1233                                    header_data,
1234                                    ALIGN(length_in_bits, 32) >> 5,
1235                                    length_in_bits & 0x1f,
1236                                    5, /* FIXME: check it */
1237                                    0,
1238                                    0,
1239                                    !param->has_emulation_bytes,
1240                                    slice_batch);
1241     }
1242     
1243     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
1244
1245     if (encode_state->packed_header_data[idx]) {
1246         VAEncPackedHeaderParameterBuffer *param = NULL;
1247         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1248         unsigned int length_in_bits;
1249
1250         assert(encode_state->packed_header_param[idx]);
1251         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1252         length_in_bits = param->bit_length;
1253
1254         mfc_context->insert_object(ctx,
1255                                    encoder_context,
1256                                    header_data,
1257                                    ALIGN(length_in_bits, 32) >> 5,
1258                                    length_in_bits & 0x1f,
1259                                    5, /* FIXME: check it */
1260                                    0,
1261                                    0,
1262                                    !param->has_emulation_bytes,
1263                                    slice_batch);
1264     }
1265 }
1266
1267 #if __SOFTWARE__
1268
1269 static int
1270 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1271                                 int qp,unsigned int *msg,
1272                               struct intel_encoder_context *encoder_context,
1273                               unsigned char target_mb_size, unsigned char max_mb_size,
1274                               struct intel_batchbuffer *batch)
1275 {
1276     int len_in_dwords = 11;
1277
1278     if (batch == NULL)
1279         batch = encoder_context->base.batch;
1280
1281     BEGIN_BCS_BATCH(batch, len_in_dwords);
1282
1283     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1284     OUT_BCS_BATCH(batch, 0);
1285     OUT_BCS_BATCH(batch, 0);
1286     OUT_BCS_BATCH(batch, 
1287                   (0 << 24) |           /* PackedMvNum, Debug*/
1288                   (0 << 20) |           /* No motion vector */
1289                   (1 << 19) |           /* CbpDcY */
1290                   (1 << 18) |           /* CbpDcU */
1291                   (1 << 17) |           /* CbpDcV */
1292                   (msg[0] & 0xFFFF) );
1293
1294     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1295     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1296     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1297
1298     /*Stuff for Intra MB*/
1299     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1300     OUT_BCS_BATCH(batch, msg[2]);       
1301     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
1302     
1303     /*MaxSizeInWord and TargetSzieInWord*/
1304     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1305                   (target_mb_size << 16) );
1306
1307     ADVANCE_BCS_BATCH(batch);
1308
1309     return len_in_dwords;
1310 }
1311
1312 static int
1313 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1314                               unsigned int *msg, unsigned int offset,
1315                               struct intel_encoder_context *encoder_context,
1316                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1317                               struct intel_batchbuffer *batch)
1318 {
1319     int len_in_dwords = 11;
1320
1321     if (batch == NULL)
1322         batch = encoder_context->base.batch;
1323
1324     BEGIN_BCS_BATCH(batch, len_in_dwords);
1325
1326     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1327
1328     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
1329     OUT_BCS_BATCH(batch, offset);
1330
1331     OUT_BCS_BATCH(batch, msg[0]);
1332
1333     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1334     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1335 #if 0 
1336     if ( slice_type == SLICE_TYPE_B) {
1337         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1338     } else {
1339         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1340     }
1341 #else
1342     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1343 #endif
1344
1345
1346     /*Stuff for Inter MB*/
1347     OUT_BCS_BATCH(batch, msg[1]);        
1348     OUT_BCS_BATCH(batch, 0x0);    
1349     OUT_BCS_BATCH(batch, 0x0);        
1350
1351     /*MaxSizeInWord and TargetSzieInWord*/
1352     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1353                   (target_mb_size << 16) );
1354
1355     ADVANCE_BCS_BATCH(batch);
1356
1357     return len_in_dwords;
1358 }
1359
1360 static void 
1361 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1362                                        struct encode_state *encode_state,
1363                                        struct intel_encoder_context *encoder_context,
1364                                        int slice_index,
1365                                        struct intel_batchbuffer *slice_batch)
1366 {
1367     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1368     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1369     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1370     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1371     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1372     unsigned int *msg = NULL, offset = 0;
1373     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1374     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1375     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1376     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1377     int i,x,y;
1378     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1379     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1380     unsigned char *slice_header = NULL;
1381     int slice_header_length_in_bits = 0;
1382     unsigned int tail_data[] = { 0x0, 0x0 };
1383     int slice_type = pSliceParameter->slice_type;
1384
1385
1386     if (rate_control_mode == VA_RC_CBR) {
1387         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1388         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1389     }
1390
1391     /* only support for 8-bit pixel bit-depth */
1392     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1393     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1394     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1395     assert(qp >= 0 && qp < 52);
1396
1397     gen75_mfc_avc_slice_state(ctx, 
1398                              pPicParameter,
1399                              pSliceParameter,
1400                              encode_state, encoder_context,
1401                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1402
1403     if ( slice_index == 0) 
1404         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1405
1406     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1407
1408     // slice hander
1409     mfc_context->insert_object(ctx, encoder_context,
1410                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1411                                5,  /* first 5 bytes are start code + nal unit type */
1412                                1, 0, 1, slice_batch);
1413
1414     dri_bo_map(vme_context->vme_output.bo , 1);
1415     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
1416
1417     if (is_intra) {
1418         msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
1419     } else {
1420         msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
1421         msg += 32; /* the first 32 DWs are MVs */
1422         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
1423     }
1424    
1425     for (i = pSliceParameter->macroblock_address; 
1426          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1427         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1428         x = i % width_in_mbs;
1429         y = i / width_in_mbs;
1430
1431         if (is_intra) {
1432             assert(msg);
1433             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1434             msg += INTRA_VME_OUTPUT_IN_DWS;
1435         } else {
1436             if (msg[0] & INTRA_MB_FLAG_MASK) {
1437                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1438             } else {
1439                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1440             }
1441
1442             msg += INTER_VME_OUTPUT_IN_DWS;
1443             offset += INTER_VME_OUTPUT_IN_BYTES;
1444         }
1445     }
1446    
1447     dri_bo_unmap(vme_context->vme_output.bo);
1448
1449     if ( last_slice ) {    
1450         mfc_context->insert_object(ctx, encoder_context,
1451                                    tail_data, 2, 8,
1452                                    2, 1, 1, 0, slice_batch);
1453     } else {
1454         mfc_context->insert_object(ctx, encoder_context,
1455                                    tail_data, 1, 8,
1456                                    1, 1, 1, 0, slice_batch);
1457     }
1458
1459     free(slice_header);
1460
1461 }
1462
1463 static dri_bo *
1464 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1465                                   struct encode_state *encode_state,
1466                                   struct intel_encoder_context *encoder_context)
1467 {
1468     struct i965_driver_data *i965 = i965_driver_data(ctx);
1469     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1470     dri_bo *batch_bo = batch->buffer;
1471     int i;
1472
1473     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1474         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1475     }
1476
1477     intel_batchbuffer_align(batch, 8);
1478     
1479     BEGIN_BCS_BATCH(batch, 2);
1480     OUT_BCS_BATCH(batch, 0);
1481     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1482     ADVANCE_BCS_BATCH(batch);
1483
1484     dri_bo_reference(batch_bo);
1485     intel_batchbuffer_free(batch);
1486
1487     return batch_bo;
1488 }
1489
1490 #else
1491
1492 static void
1493 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1494                                     struct encode_state *encode_state,
1495                                     struct intel_encoder_context *encoder_context)
1496
1497 {
1498     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1499     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1500
1501     assert(vme_context->vme_output.bo);
1502     mfc_context->buffer_suface_setup(ctx,
1503                                      &mfc_context->gpe_context,
1504                                      &vme_context->vme_output,
1505                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1506                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1507     assert(mfc_context->aux_batchbuffer_surface.bo);
1508     mfc_context->buffer_suface_setup(ctx,
1509                                      &mfc_context->gpe_context,
1510                                      &mfc_context->aux_batchbuffer_surface,
1511                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1512                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1513 }
1514
1515 static void
1516 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1517                                      struct encode_state *encode_state,
1518                                      struct intel_encoder_context *encoder_context)
1519
1520 {
1521     struct i965_driver_data *i965 = i965_driver_data(ctx);
1522     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1523     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1524     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1525     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1526     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1527     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1528     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1529     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1530                                                            "MFC batchbuffer",
1531                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1532                                                            0x1000);
1533     mfc_context->buffer_suface_setup(ctx,
1534                                      &mfc_context->gpe_context,
1535                                      &mfc_context->mfc_batchbuffer_surface,
1536                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1537                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1538 }
1539
1540 static void
1541 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1542                                     struct encode_state *encode_state,
1543                                     struct intel_encoder_context *encoder_context)
1544 {
1545     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1546     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1547 }
1548
1549 static void
1550 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1551                                 struct encode_state *encode_state,
1552                                 struct intel_encoder_context *encoder_context)
1553 {
1554     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1555     struct gen6_interface_descriptor_data *desc;   
1556     int i;
1557     dri_bo *bo;
1558
1559     bo = mfc_context->gpe_context.idrt.bo;
1560     dri_bo_map(bo, 1);
1561     assert(bo->virtual);
1562     desc = bo->virtual;
1563
1564     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1565         struct i965_kernel *kernel;
1566
1567         kernel = &mfc_context->gpe_context.kernels[i];
1568         assert(sizeof(*desc) == 32);
1569
1570         /*Setup the descritor table*/
1571         memset(desc, 0, sizeof(*desc));
1572         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1573         desc->desc2.sampler_count = 0;
1574         desc->desc2.sampler_state_pointer = 0;
1575         desc->desc3.binding_table_entry_count = 2;
1576         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1577         desc->desc4.constant_urb_entry_read_offset = 0;
1578         desc->desc4.constant_urb_entry_read_length = 4;
1579                 
1580         /*kernel start*/
1581         dri_bo_emit_reloc(bo,   
1582                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1583                           0,
1584                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1585                           kernel->bo);
1586         desc++;
1587     }
1588
1589     dri_bo_unmap(bo);
1590 }
1591
1592 static void
1593 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1594                                     struct encode_state *encode_state,
1595                                     struct intel_encoder_context *encoder_context)
1596 {
1597     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1598     
1599     (void)mfc_context;
1600 }
1601
1602 static void
1603 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1604                                          int index,
1605                                          int head_offset,
1606                                          int batchbuffer_offset,
1607                                          int head_size,
1608                                          int tail_size,
1609                                          int number_mb_cmds,
1610                                          int first_object,
1611                                          int last_object,
1612                                          int last_slice,
1613                                          int mb_x,
1614                                          int mb_y,
1615                                          int width_in_mbs,
1616                                          int qp)
1617 {
1618     BEGIN_BATCH(batch, 12);
1619     
1620     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1621     OUT_BATCH(batch, index);
1622     OUT_BATCH(batch, 0);
1623     OUT_BATCH(batch, 0);
1624     OUT_BATCH(batch, 0);
1625     OUT_BATCH(batch, 0);
1626    
1627     /*inline data */
1628     OUT_BATCH(batch, head_offset);
1629     OUT_BATCH(batch, batchbuffer_offset);
1630     OUT_BATCH(batch, 
1631               head_size << 16 |
1632               tail_size);
1633     OUT_BATCH(batch,
1634               number_mb_cmds << 16 |
1635               first_object << 2 |
1636               last_object << 1 |
1637               last_slice);
1638     OUT_BATCH(batch,
1639               mb_y << 8 |
1640               mb_x);
1641     OUT_BATCH(batch,
1642               qp << 16 |
1643               width_in_mbs);
1644
1645     ADVANCE_BATCH(batch);
1646 }
1647
1648 static void
1649 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1650                                        struct intel_encoder_context *encoder_context,
1651                                        VAEncSliceParameterBufferH264 *slice_param,
1652                                        int head_offset,
1653                                        unsigned short head_size,
1654                                        unsigned short tail_size,
1655                                        int batchbuffer_offset,
1656                                        int qp,
1657                                        int last_slice)
1658 {
1659     struct intel_batchbuffer *batch = encoder_context->base.batch;
1660     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1661     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1662     int total_mbs = slice_param->num_macroblocks;
1663     int number_mb_cmds = 128;
1664     int starting_mb = 0;
1665     int last_object = 0;
1666     int first_object = 1;
1667     int i;
1668     int mb_x, mb_y;
1669     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1670
1671     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1672         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1673         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1674         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1675         assert(mb_x <= 255 && mb_y <= 255);
1676
1677         starting_mb += number_mb_cmds;
1678
1679         gen75_mfc_batchbuffer_emit_object_command(batch,
1680                                                  index,
1681                                                  head_offset,
1682                                                  batchbuffer_offset,
1683                                                  head_size,
1684                                                  tail_size,
1685                                                  number_mb_cmds,
1686                                                  first_object,
1687                                                  last_object,
1688                                                  last_slice,
1689                                                  mb_x,
1690                                                  mb_y,
1691                                                  width_in_mbs,
1692                                                  qp);
1693
1694         if (first_object) {
1695             head_offset += head_size;
1696             batchbuffer_offset += head_size;
1697         }
1698
1699         if (last_object) {
1700             head_offset += tail_size;
1701             batchbuffer_offset += tail_size;
1702         }
1703
1704         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1705
1706         first_object = 0;
1707     }
1708
1709     if (!last_object) {
1710         last_object = 1;
1711         number_mb_cmds = total_mbs % number_mb_cmds;
1712         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1713         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1714         assert(mb_x <= 255 && mb_y <= 255);
1715         starting_mb += number_mb_cmds;
1716
1717         gen75_mfc_batchbuffer_emit_object_command(batch,
1718                                                  index,
1719                                                  head_offset,
1720                                                  batchbuffer_offset,
1721                                                  head_size,
1722                                                  tail_size,
1723                                                  number_mb_cmds,
1724                                                  first_object,
1725                                                  last_object,
1726                                                  last_slice,
1727                                                  mb_x,
1728                                                  mb_y,
1729                                                  width_in_mbs,
1730                                                  qp);
1731     }
1732 }
1733                           
1734 /*
1735  * return size in Owords (16bytes)
1736  */         
1737 static int
1738 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1739                                struct encode_state *encode_state,
1740                                struct intel_encoder_context *encoder_context,
1741                                int slice_index,
1742                                int batchbuffer_offset)
1743 {
1744     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1745     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1746     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1747     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1748     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1749     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1750     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1751     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1752     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1753     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1754     unsigned char *slice_header = NULL;
1755     int slice_header_length_in_bits = 0;
1756     unsigned int tail_data[] = { 0x0, 0x0 };
1757     long head_offset;
1758     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1759     unsigned short head_size, tail_size;
1760     int slice_type = pSliceParameter->slice_type;
1761
1762     if (rate_control_mode == VA_RC_CBR) {
1763         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1764         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1765     }
1766
1767     /* only support for 8-bit pixel bit-depth */
1768     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1769     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1770     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1771     assert(qp >= 0 && qp < 52);
1772
1773     head_offset = old_used / 16;
1774     gen75_mfc_avc_slice_state(ctx,
1775                              pPicParameter,
1776                              pSliceParameter,
1777                              encode_state,
1778                              encoder_context,
1779                              (rate_control_mode == VA_RC_CBR),
1780                              qp,
1781                              slice_batch);
1782
1783     if (slice_index == 0)
1784         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1785
1786     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1787
1788     // slice hander
1789     mfc_context->insert_object(ctx,
1790                                encoder_context,
1791                                (unsigned int *)slice_header,
1792                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1793                                slice_header_length_in_bits & 0x1f,
1794                                5,  /* first 5 bytes are start code + nal unit type */
1795                                1,
1796                                0,
1797                                1,
1798                                slice_batch);
1799     free(slice_header);
1800
1801     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1802     used = intel_batchbuffer_used_size(slice_batch);
1803     head_size = (used - old_used) / 16;
1804     old_used = used;
1805
1806     /* tail */
1807     if (last_slice) {    
1808         mfc_context->insert_object(ctx,
1809                                    encoder_context,
1810                                    tail_data,
1811                                    2,
1812                                    8,
1813                                    2,
1814                                    1,
1815                                    1,
1816                                    0,
1817                                    slice_batch);
1818     } else {
1819         mfc_context->insert_object(ctx,
1820                                    encoder_context,
1821                                    tail_data,
1822                                    1,
1823                                    8,
1824                                    1,
1825                                    1,
1826                                    1,
1827                                    0,
1828                                    slice_batch);
1829     }
1830
1831     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1832     used = intel_batchbuffer_used_size(slice_batch);
1833     tail_size = (used - old_used) / 16;
1834
1835    
1836     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1837                                            encoder_context,
1838                                            pSliceParameter,
1839                                            head_offset,
1840                                            head_size,
1841                                            tail_size,
1842                                            batchbuffer_offset,
1843                                            qp,
1844                                            last_slice);
1845
1846     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1847 }
1848
1849 static void
1850 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1851                                   struct encode_state *encode_state,
1852                                   struct intel_encoder_context *encoder_context)
1853 {
1854     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1855     struct intel_batchbuffer *batch = encoder_context->base.batch;
1856     int i, size, offset = 0;
1857     intel_batchbuffer_start_atomic(batch, 0x4000); 
1858     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1859
1860     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1861         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1862         offset += size;
1863     }
1864
1865     intel_batchbuffer_end_atomic(batch);
1866     intel_batchbuffer_flush(batch);
1867 }
1868
1869 static void
1870 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1871                                struct encode_state *encode_state,
1872                                struct intel_encoder_context *encoder_context)
1873 {
1874     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1875     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1876     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1877     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1878 }
1879
1880 static dri_bo *
1881 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1882                                   struct encode_state *encode_state,
1883                                   struct intel_encoder_context *encoder_context)
1884 {
1885     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1886
1887     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1888     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1889
1890     return mfc_context->mfc_batchbuffer_surface.bo;
1891 }
1892
1893 #endif
1894
1895 static void
1896 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1897                                  struct encode_state *encode_state,
1898                                  struct intel_encoder_context *encoder_context)
1899 {
1900     struct intel_batchbuffer *batch = encoder_context->base.batch;
1901     dri_bo *slice_batch_bo;
1902
1903     if ( interlace_check(ctx, encode_state, encoder_context) ) {
1904         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1905         assert(0);
1906         return; 
1907     }
1908
1909 #if __SOFTWARE__
1910     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1911 #else
1912     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1913 #endif
1914
1915     // begin programing
1916     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1917     intel_batchbuffer_emit_mi_flush(batch);
1918     
1919     // picture level programing
1920     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1921
1922     BEGIN_BCS_BATCH(batch, 2);
1923     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1924     OUT_BCS_RELOC(batch,
1925                   slice_batch_bo,
1926                   I915_GEM_DOMAIN_COMMAND, 0, 
1927                   0);
1928     ADVANCE_BCS_BATCH(batch);
1929
1930     // end programing
1931     intel_batchbuffer_end_atomic(batch);
1932
1933     dri_bo_unreference(slice_batch_bo);
1934 }
1935
1936
1937 static VAStatus
1938 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1939                             struct encode_state *encode_state,
1940                             struct intel_encoder_context *encoder_context)
1941 {
1942     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1943     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1944     int current_frame_bits_size;
1945     int sts;
1946  
1947     for (;;) {
1948         gen75_mfc_init(ctx, encoder_context);
1949         gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1950         /*Programing bcs pipeline*/
1951         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1952         gen75_mfc_run(ctx, encode_state, encoder_context);
1953         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1954             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1955             sts = gen75_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1956             if (sts == BRC_NO_HRD_VIOLATION) {
1957                 gen75_mfc_hrd_context_update(encode_state, mfc_context);
1958                 break;
1959             }
1960             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1961                 if (!mfc_context->hrd.violation_noted) {
1962                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1963                     mfc_context->hrd.violation_noted = 1;
1964                 }
1965                 return VA_STATUS_SUCCESS;
1966             }
1967         } else {
1968             break;
1969         }
1970     }
1971
1972     return VA_STATUS_SUCCESS;
1973 }
1974
1975 static void gen75_mfc_brc_prepare(struct encode_state *encode_state,
1976                           struct intel_encoder_context *encoder_context)
1977 {
1978     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1979     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1980
1981     if (rate_control_mode == VA_RC_CBR) {
1982         /*Programing bit rate control */
1983         if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
1984             gen75_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1985             gen75_mfc_brc_init(encode_state, encoder_context);
1986         }
1987
1988         /*Programing HRD control */
1989         if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
1990             gen75_mfc_hrd_context_init(encode_state, encoder_context);    
1991     }
1992 }
1993
1994 static void
1995 gen75_mfc_context_destroy(void *context)
1996 {
1997     struct gen6_mfc_context *mfc_context = context;
1998     int i;
1999
2000     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2001     mfc_context->post_deblocking_output.bo = NULL;
2002
2003     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2004     mfc_context->pre_deblocking_output.bo = NULL;
2005
2006     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2007     mfc_context->uncompressed_picture_source.bo = NULL;
2008
2009     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2010     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2011
2012     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2013         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2014         mfc_context->direct_mv_buffers[i].bo = NULL;
2015     }
2016
2017     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2018     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2019
2020     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2021     mfc_context->macroblock_status_buffer.bo = NULL;
2022
2023     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2024     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2025
2026     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2027     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2028
2029
2030     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2031         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2032         mfc_context->reference_surfaces[i].bo = NULL;  
2033     }
2034
2035     i965_gpe_context_destroy(&mfc_context->gpe_context);
2036
2037     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2038     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2039
2040     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2041     mfc_context->aux_batchbuffer_surface.bo = NULL;
2042
2043     if (mfc_context->aux_batchbuffer)
2044         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2045
2046     mfc_context->aux_batchbuffer = NULL;
2047
2048     free(mfc_context);
2049 }
2050
2051 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2052                   VAProfile profile,
2053                   struct encode_state *encode_state,
2054                   struct intel_encoder_context *encoder_context)
2055 {
2056     VAStatus vaStatus;
2057
2058     switch (profile) {
2059     case VAProfileH264Baseline:
2060     case VAProfileH264Main:
2061     case VAProfileH264High:
2062         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2063         break;
2064
2065         /* FIXME: add for other profile */
2066     default:
2067         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2068         break;
2069     }
2070
2071     return vaStatus;
2072 }
2073
2074 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2075 {
2076     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2077
2078     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2079
2080     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2081     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2082
2083     mfc_context->gpe_context.curbe.length = 32 * 4;
2084
2085     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2086     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2087     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2088     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2089     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2090
2091     i965_gpe_load_kernels(ctx,
2092                           &mfc_context->gpe_context,
2093                           gen75_mfc_kernels,
2094                           NUM_MFC_KERNEL);
2095
2096     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2097     mfc_context->set_surface_state = gen75_mfc_surface_state;
2098     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2099     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2100     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2101     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2102     mfc_context->insert_object = gen75_mfc_avc_insert_object;
2103     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2104
2105     encoder_context->mfc_context = mfc_context;
2106     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2107     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2108     encoder_context->mfc_brc_prepare = gen75_mfc_brc_prepare;
2109
2110     return True;
2111 }