Unify the XXX_free_avc_surface for media encoding/decoding
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #ifndef HAVE_GEN_AVC_SURFACE
30 #define HAVE_GEN_AVC_SURFACE 1
31 #endif
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <assert.h>
37 #include <math.h>
38
39 #include "intel_batchbuffer.h"
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "gen6_mfc.h"
46 #include "gen6_vme.h"
47 #include "intel_media.h"
48
49 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
51 };
52
53 static const uint32_t gen6_mfc_batchbuffer_avc_inter[][4] = {
54 #include "shaders/utils/mfc_batchbuffer_avc_inter.g6b"
55 };
56
57 static struct i965_kernel gen6_mfc_kernels[] = {
58     {
59         "MFC AVC INTRA BATCHBUFFER ",
60         MFC_BATCHBUFFER_AVC_INTRA,
61         gen6_mfc_batchbuffer_avc_intra,
62         sizeof(gen6_mfc_batchbuffer_avc_intra),
63         NULL
64     },
65
66     {
67         "MFC AVC INTER BATCHBUFFER ",
68         MFC_BATCHBUFFER_AVC_INTER,
69         gen6_mfc_batchbuffer_avc_inter,
70         sizeof(gen6_mfc_batchbuffer_avc_inter),
71         NULL
72     },
73 };
74
75 static void
76 gen6_mfc_pipe_mode_select(VADriverContextP ctx,
77                           int standard_select,
78                           struct intel_encoder_context *encoder_context)
79 {
80     struct intel_batchbuffer *batch = encoder_context->base.batch;
81     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
82
83     assert(standard_select == MFX_FORMAT_AVC);
84
85     BEGIN_BCS_BATCH(batch, 4);
86
87     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
88     OUT_BCS_BATCH(batch,
89                   (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
90                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
91                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
92                   (0 << 7)  | /* disable TLB prefectch */
93                   (0 << 5)  | /* not in stitch mode */
94                   (1 << 4)  | /* encoding mode */
95                   (2 << 0));  /* Standard Select: AVC */
96     OUT_BCS_BATCH(batch,
97                   (0 << 20) | /* round flag in PB slice */
98                   (0 << 19) | /* round flag in Intra8x8 */
99                   (0 << 7)  | /* expand NOA bus flag */
100                   (1 << 6)  | /* must be 1 */
101                   (0 << 5)  | /* disable clock gating for NOA */
102                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
103                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
104                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
105                   (0 << 1)  | /* AVC long field motion vector */
106                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
107     OUT_BCS_BATCH(batch, 0);
108
109     ADVANCE_BCS_BATCH(batch);
110 }
111
112 static void
113 gen6_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
114 {
115     struct intel_batchbuffer *batch = encoder_context->base.batch;
116     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
117
118     BEGIN_BCS_BATCH(batch, 6);
119
120     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
121     OUT_BCS_BATCH(batch, 0);
122     OUT_BCS_BATCH(batch,
123                   ((mfc_context->surface_state.height - 1) << 19) |
124                   ((mfc_context->surface_state.width - 1) << 6));
125     OUT_BCS_BATCH(batch,
126                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
127                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
128                   (0 << 22) | /* surface object control state, FIXME??? */
129                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
130                   (0 << 2)  | /* must be 0 for interleave U/V */
131                   (1 << 1)  | /* must be y-tiled */
132                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
133     OUT_BCS_BATCH(batch,
134                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
135                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
136     OUT_BCS_BATCH(batch, 0);
137     ADVANCE_BCS_BATCH(batch);
138 }
139
140 static void
141 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
142 {
143     struct intel_batchbuffer *batch = encoder_context->base.batch;
144     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
145     int i;
146
147     BEGIN_BCS_BATCH(batch, 24);
148
149     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
150
151     if (mfc_context->pre_deblocking_output.bo)
152         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
153                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
154                       0);
155     else
156         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
157
158     if (mfc_context->post_deblocking_output.bo)
159         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
160                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
161                       0);                                                                                       /* post output addr  */ 
162     else
163         OUT_BCS_BATCH(batch, 0);
164
165     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
166                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
167                   0);                                                                                   /* uncompressed data */
168     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
169                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
170                   0);                                                                                   /* StreamOut data*/
171     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
172                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
173                   0);   
174     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
175                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
176                   0);
177     /* 7..22 Reference pictures*/
178     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
179         if ( mfc_context->reference_surfaces[i].bo != NULL) {
180             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
181                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
182                           0);                   
183         } else {
184             OUT_BCS_BATCH(batch, 0);
185         }
186     }
187     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
188                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189                   0);                                                                                   /* Macroblock status buffer*/
190
191     ADVANCE_BCS_BATCH(batch);
192 }
193
194 static void
195 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
196 {
197     struct intel_batchbuffer *batch = encoder_context->base.batch;
198     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
199     struct gen6_vme_context *vme_context = encoder_context->vme_context;
200
201     BEGIN_BCS_BATCH(batch, 11);
202
203     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
204     OUT_BCS_BATCH(batch, 0);
205     OUT_BCS_BATCH(batch, 0);
206     /* MFX Indirect MV Object Base Address */
207     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
208     OUT_BCS_BATCH(batch, 0);    
209     OUT_BCS_BATCH(batch, 0);
210     OUT_BCS_BATCH(batch, 0);
211     OUT_BCS_BATCH(batch, 0);
212     OUT_BCS_BATCH(batch, 0);
213     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
214     OUT_BCS_RELOC(batch,
215                   mfc_context->mfc_indirect_pak_bse_object.bo,
216                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217                   0);
218     OUT_BCS_RELOC(batch,
219                   mfc_context->mfc_indirect_pak_bse_object.bo,
220                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
222
223     ADVANCE_BCS_BATCH(batch);
224 }
225
226 static void
227 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
228 {
229     struct intel_batchbuffer *batch = encoder_context->base.batch;
230     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
231
232     BEGIN_BCS_BATCH(batch, 4);
233
234     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
235     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
236                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237                   0);
238     OUT_BCS_BATCH(batch, 0);
239     OUT_BCS_BATCH(batch, 0);
240
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
246                        struct intel_encoder_context *encoder_context)
247 {
248     struct intel_batchbuffer *batch = encoder_context->base.batch;
249     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
250     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
251     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
252     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
253     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
254
255     BEGIN_BCS_BATCH(batch, 13);
256     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
257     OUT_BCS_BATCH(batch, 
258                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
259     OUT_BCS_BATCH(batch, 
260                   (height_in_mbs << 16) | 
261                   (width_in_mbs << 0));
262     OUT_BCS_BATCH(batch, 
263                   (0 << 24) |     /*Second Chroma QP Offset*/
264                   (0 << 16) |     /*Chroma QP Offset*/
265                   (0 << 14) |   /*Max-bit conformance Intra flag*/
266                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
267                   (1 << 12) |   /*Should always be written as "1" */
268                   (0 << 10) |   /*QM Preset FLag */
269                   (0 << 8)  |   /*Image Structure*/
270                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
271     OUT_BCS_BATCH(batch,
272                   (400 << 16) |   /*Mininum Frame size*/        
273                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
274                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
275                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
276                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
277                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
278                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
279                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
280                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
281                   (pSequenceParameter->seq_fields.bits.direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
282                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
283                   (1 << 2)  |   /*Frame MB only flag*/
284                   (0 << 1)  |   /*MBAFF mode is in active*/
285                   (0 << 0) );   /*Field picture flag*/
286     OUT_BCS_BATCH(batch, 
287                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
288                   (1<<12)   |   
289                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
290                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
291                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
292                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
293                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
294     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
295                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
296                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
297     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
298     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
299     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
300     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
301     OUT_BCS_BATCH(batch, 0x00800001);   
302     OUT_BCS_BATCH(batch, 0);
303
304     ADVANCE_BCS_BATCH(batch);
305 }
306
307 static void
308 gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
309 {
310     struct intel_batchbuffer *batch = encoder_context->base.batch;
311     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
312
313     int i;
314
315     BEGIN_BCS_BATCH(batch, 69);
316
317     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
318
319     /* Reference frames and Current frames */
320     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
321         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
322             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
323                           I915_GEM_DOMAIN_INSTRUCTION, 0,
324                           0);
325         } else {
326             OUT_BCS_BATCH(batch, 0);
327         }
328     }
329
330     /* POL list */
331     for(i = 0; i < 32; i++) {
332         OUT_BCS_BATCH(batch, i/2);
333     }
334     OUT_BCS_BATCH(batch, 0);
335     OUT_BCS_BATCH(batch, 0);
336
337     ADVANCE_BCS_BATCH(batch);
338 }
339
340 static void
341 gen6_mfc_avc_slice_state(VADriverContextP ctx,
342                          VAEncPictureParameterBufferH264 *pic_param,
343                          VAEncSliceParameterBufferH264 *slice_param,
344                          struct encode_state *encode_state,
345                          struct intel_encoder_context *encoder_context,
346                          int rate_control_enable,
347                          int qp,
348                          struct intel_batchbuffer *batch)
349 {
350     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
351     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
352     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
353     int beginmb = slice_param->macroblock_address;
354     int endmb = beginmb + slice_param->num_macroblocks;
355     int beginx = beginmb % width_in_mbs;
356     int beginy = beginmb / width_in_mbs;
357     int nextx =  endmb % width_in_mbs;
358     int nexty = endmb / width_in_mbs;
359     int slice_type = slice_param->slice_type;
360     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
361     int bit_rate_control_target, maxQpN, maxQpP;
362     unsigned char correct[6], grow, shrink;
363     int i;
364     int weighted_pred_idc = 0;
365     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
366     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
367
368     if (batch == NULL)
369         batch = encoder_context->base.batch;
370
371     bit_rate_control_target = slice_type;
372     if (slice_type == SLICE_TYPE_SP)
373         bit_rate_control_target = SLICE_TYPE_P;
374     else if (slice_type == SLICE_TYPE_SI)
375         bit_rate_control_target = SLICE_TYPE_I;
376
377     if (slice_type == SLICE_TYPE_P) {
378         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
379     } else if (slice_type == SLICE_TYPE_B) {
380         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
381
382         if (weighted_pred_idc == 2) {
383             /* 8.4.3 - Derivation process for prediction weights (8-279) */
384             luma_log2_weight_denom = 5;
385             chroma_log2_weight_denom = 5;
386         }
387     }
388
389     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
390     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
391
392     for (i = 0; i < 6; i++)
393         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
394
395     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
396         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
397     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
398         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
399
400     BEGIN_BCS_BATCH(batch, 11);;
401
402     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
403     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
404
405     if (slice_type == SLICE_TYPE_I) {
406         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
407     } else {
408         OUT_BCS_BATCH(batch,
409                       (1 << 16) |                       /*1 reference frame*/
410                       (chroma_log2_weight_denom << 8) |
411                       (luma_log2_weight_denom << 0));
412     }
413
414     OUT_BCS_BATCH(batch, 
415                   (weighted_pred_idc << 30) |
416                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
417                   (slice_param->disable_deblocking_filter_idc << 27) |
418                   (slice_param->cabac_init_idc << 24) |
419                   (qp<<16) |                    /*Slice Quantization Parameter*/
420                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
421                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
422     OUT_BCS_BATCH(batch,
423                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
424                   (beginx << 16) |
425                   slice_param->macroblock_address );
426     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
427     OUT_BCS_BATCH(batch, 
428                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
429                   (1 << 30) |           /*ResetRateControlCounter*/
430                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
431                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
432                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
433                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
434                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
435                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
436                   (last_slice << 19) |     /*IsLastSlice*/
437                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
438                   (1 << 17) |       /*HeaderPresentFlag*/       
439                   (1 << 16) |       /*SliceData PresentFlag*/
440                   (1 << 15) |       /*TailPresentFlag*/
441                   (1 << 13) |       /*RBSP NAL TYPE*/   
442                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
443     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
444     OUT_BCS_BATCH(batch,
445                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
446                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
447                   (shrink << 8)  |
448                   (grow << 0));   
449     OUT_BCS_BATCH(batch,
450                   (correct[5] << 20) |
451                   (correct[4] << 16) |
452                   (correct[3] << 12) |
453                   (correct[2] << 8) |
454                   (correct[1] << 4) |
455                   (correct[0] << 0));
456     OUT_BCS_BATCH(batch, 0);
457
458     ADVANCE_BCS_BATCH(batch);
459 }
460
461 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
462 {
463     struct intel_batchbuffer *batch = encoder_context->base.batch;
464     int i;
465
466     BEGIN_BCS_BATCH(batch, 58);
467
468     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
469     OUT_BCS_BATCH(batch, 0xFF ) ; 
470     for( i = 0; i < 56; i++) {
471         OUT_BCS_BATCH(batch, 0x10101010); 
472     }   
473
474     ADVANCE_BCS_BATCH(batch);
475 }
476
477 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
478 {
479     struct intel_batchbuffer *batch = encoder_context->base.batch;
480     int i;
481
482     BEGIN_BCS_BATCH(batch, 113);
483     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
484
485     for(i = 0; i < 112;i++) {
486         OUT_BCS_BATCH(batch, 0x10001000);
487     }   
488
489     ADVANCE_BCS_BATCH(batch);   
490 }
491
492 static void
493 gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
494 {
495     struct intel_batchbuffer *batch = encoder_context->base.batch;
496     int i;
497
498     BEGIN_BCS_BATCH(batch, 10);
499     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
500     OUT_BCS_BATCH(batch, 0);                  //Select L0
501     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
502     for(i = 0; i < 7; i++) {
503         OUT_BCS_BATCH(batch, 0x80808080);
504     }   
505     ADVANCE_BCS_BATCH(batch);
506
507     BEGIN_BCS_BATCH(batch, 10);
508     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
509     OUT_BCS_BATCH(batch, 1);                  //Select L1
510     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
511     for(i = 0; i < 7; i++) {
512         OUT_BCS_BATCH(batch, 0x80808080);
513     }   
514     ADVANCE_BCS_BATCH(batch);
515 }
516         
517 static void
518 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
519                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
520                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
521                            struct intel_batchbuffer *batch)
522 {
523     if (batch == NULL)
524         batch = encoder_context->base.batch;
525
526     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
527
528     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
529
530     OUT_BCS_BATCH(batch,
531                   (0 << 16) |   /* always start at offset 0 */
532                   (data_bits_in_last_dw << 8) |
533                   (skip_emul_byte_count << 4) |
534                   (!!emulation_flag << 3) |
535                   ((!!is_last_header) << 2) |
536                   ((!!is_end_of_slice) << 1) |
537                   (0 << 0));    /* FIXME: ??? */
538
539     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
540     ADVANCE_BCS_BATCH(batch);
541 }
542
543 static void gen6_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
544 {
545     struct i965_driver_data *i965 = i965_driver_data(ctx);
546     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
547     dri_bo *bo;
548     int i;
549
550     /*Encode common setup for MFC*/
551     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
552     mfc_context->post_deblocking_output.bo = NULL;
553
554     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
555     mfc_context->pre_deblocking_output.bo = NULL;
556
557     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
558     mfc_context->uncompressed_picture_source.bo = NULL;
559
560     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
561     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
562
563     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
564         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
565         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
566         mfc_context->direct_mv_buffers[i].bo = NULL;
567     }
568
569     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
570         if (mfc_context->reference_surfaces[i].bo != NULL)
571             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
572         mfc_context->reference_surfaces[i].bo = NULL;  
573     }
574
575     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
576     bo = dri_bo_alloc(i965->intel.bufmgr,
577                       "Buffer",
578                       128 * 64,
579                       64);
580     assert(bo);
581     mfc_context->intra_row_store_scratch_buffer.bo = bo;
582
583     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
584     bo = dri_bo_alloc(i965->intel.bufmgr,
585                       "Buffer",
586                       128*128*16,
587                       64);
588     assert(bo);
589     mfc_context->macroblock_status_buffer.bo = bo;
590
591     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
592     bo = dri_bo_alloc(i965->intel.bufmgr,
593                       "Buffer",
594                       49152,  /* 6 * 128 * 64 */
595                       64);
596     assert(bo);
597     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
598
599     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
600     bo = dri_bo_alloc(i965->intel.bufmgr,
601                       "Buffer",
602                       12288, /* 1.5 * 128 * 64 */
603                       0x1000);
604     assert(bo);
605     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
606
607     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
608     mfc_context->mfc_batchbuffer_surface.bo = NULL;
609
610     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
611     mfc_context->aux_batchbuffer_surface.bo = NULL;
612
613     if (mfc_context->aux_batchbuffer)
614         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
615
616     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
617     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
618     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
619     mfc_context->aux_batchbuffer_surface.pitch = 16;
620     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
621     mfc_context->aux_batchbuffer_surface.size_block = 16;
622
623     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
624 }
625
626 static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
627                                       struct encode_state *encode_state,
628                                       struct intel_encoder_context *encoder_context)
629 {
630     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
631
632     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
633     mfc_context->set_surface_state(ctx, encoder_context);
634     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
635     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
636     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
637     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
638     mfc_context->avc_qm_state(ctx, encoder_context);
639     mfc_context->avc_fqm_state(ctx, encoder_context);
640     gen6_mfc_avc_directmode_state(ctx, encoder_context); 
641     gen6_mfc_avc_ref_idx_state(ctx, encoder_context);
642 }
643
644
645 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
646                                      struct encode_state *encode_state,
647                                      struct intel_encoder_context *encoder_context)
648 {
649     struct i965_driver_data *i965 = i965_driver_data(ctx);
650     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
651     struct object_surface *obj_surface; 
652     struct object_buffer *obj_buffer;
653     GenAvcSurface *gen6_avc_surface;
654     dri_bo *bo;
655     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
656     VAStatus vaStatus = VA_STATUS_SUCCESS;
657     int i, j, enable_avc_ildb = 0;
658     VAEncSliceParameterBufferH264 *slice_param;
659     VACodedBufferSegment *coded_buffer_segment;
660     unsigned char *flag = NULL;
661
662     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
663         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
664         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
665
666         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
667             assert((slice_param->slice_type == SLICE_TYPE_I) ||
668                    (slice_param->slice_type == SLICE_TYPE_SI) ||
669                    (slice_param->slice_type == SLICE_TYPE_P) ||
670                    (slice_param->slice_type == SLICE_TYPE_SP) ||
671                    (slice_param->slice_type == SLICE_TYPE_B));
672
673             if (slice_param->disable_deblocking_filter_idc != 1) {
674                 enable_avc_ildb = 1;
675                 break;
676             }
677
678             slice_param++;
679         }
680     }
681
682     /*Setup all the input&output object*/
683
684     /* Setup current frame and current direct mv buffer*/
685     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
686     assert(obj_surface);
687     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
688
689     if ( obj_surface->private_data == NULL) {
690         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
691         gen6_avc_surface->dmv_top = 
692             dri_bo_alloc(i965->intel.bufmgr,
693                          "Buffer",
694                          68*8192, 
695                          64);
696         gen6_avc_surface->dmv_bottom = 
697             dri_bo_alloc(i965->intel.bufmgr,
698                          "Buffer",
699                          68*8192, 
700                          64);
701         assert(gen6_avc_surface->dmv_top);
702         assert(gen6_avc_surface->dmv_bottom);
703         obj_surface->private_data = (void *)gen6_avc_surface;
704         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
705     }
706     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
707     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
708     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
709     dri_bo_reference(gen6_avc_surface->dmv_top);
710     dri_bo_reference(gen6_avc_surface->dmv_bottom);
711
712     if (enable_avc_ildb) {
713         mfc_context->post_deblocking_output.bo = obj_surface->bo;
714         dri_bo_reference(mfc_context->post_deblocking_output.bo);
715     } else {
716         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
717         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
718     }
719
720     mfc_context->surface_state.width = obj_surface->orig_width;
721     mfc_context->surface_state.height = obj_surface->orig_height;
722     mfc_context->surface_state.w_pitch = obj_surface->width;
723     mfc_context->surface_state.h_pitch = obj_surface->height;
724     
725     /* Setup reference frames and direct mv buffers*/
726     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
727         if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
728             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
729             assert(obj_surface);
730             if (obj_surface->bo != NULL) {
731                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
732                 dri_bo_reference(obj_surface->bo);
733             }
734             /* Check DMV buffer */
735             if ( obj_surface->private_data == NULL) {
736                 
737                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
738                 gen6_avc_surface->dmv_top = 
739                     dri_bo_alloc(i965->intel.bufmgr,
740                                  "Buffer",
741                                  68*8192, 
742                                  64);
743                 gen6_avc_surface->dmv_bottom = 
744                     dri_bo_alloc(i965->intel.bufmgr,
745                                  "Buffer",
746                                  68*8192, 
747                                  64);
748                 assert(gen6_avc_surface->dmv_top);
749                 assert(gen6_avc_surface->dmv_bottom);
750                 obj_surface->private_data = gen6_avc_surface;
751                 obj_surface->free_private_data = gen_free_avc_surface; 
752             }
753     
754             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
755             /* Setup DMV buffer */
756             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
757             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
758             dri_bo_reference(gen6_avc_surface->dmv_top);
759             dri_bo_reference(gen6_avc_surface->dmv_bottom);
760         } else {
761             break;
762         }
763     }
764         
765     obj_surface = SURFACE(encoder_context->input_yuv_surface);
766     assert(obj_surface && obj_surface->bo);
767     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
768     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
769
770     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
771     bo = obj_buffer->buffer_store->bo;
772     assert(bo);
773     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
774     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
775     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
776     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
777     
778     dri_bo_map(bo, 1);
779     coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
780     flag = (unsigned char *)(coded_buffer_segment + 1);
781     *flag = 0;
782     dri_bo_unmap(bo);
783
784     return vaStatus;
785 }
786
787 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
788                              struct encode_state *encode_state,
789                              struct intel_encoder_context *encoder_context)
790 {
791     struct intel_batchbuffer *batch = encoder_context->base.batch;
792
793     intel_batchbuffer_flush(batch);             //run the pipeline
794
795     return VA_STATUS_SUCCESS;
796 }
797
798 static VAStatus
799 gen6_mfc_stop(VADriverContextP ctx, 
800               struct encode_state *encode_state,
801               struct intel_encoder_context *encoder_context,
802               int *encoded_bits_size)
803 {
804     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
805     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
806     VACodedBufferSegment *coded_buffer_segment;
807     
808     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
809     assert(vaStatus == VA_STATUS_SUCCESS);
810     *encoded_bits_size = coded_buffer_segment->size * 8;
811     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
812
813     return VA_STATUS_SUCCESS;
814 }
815
816 #if __SOFTWARE__
817
818 static int
819 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
820                               struct intel_encoder_context *encoder_context,
821                               unsigned char target_mb_size, unsigned char max_mb_size,
822                               struct intel_batchbuffer *batch)
823 {
824     int len_in_dwords = 11;
825
826     if (batch == NULL)
827         batch = encoder_context->base.batch;
828
829     BEGIN_BCS_BATCH(batch, len_in_dwords);
830
831     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
832     OUT_BCS_BATCH(batch, 0);
833     OUT_BCS_BATCH(batch, 0);
834     OUT_BCS_BATCH(batch, 
835                   (0 << 24) |           /* PackedMvNum, Debug*/
836                   (0 << 20) |           /* No motion vector */
837                   (1 << 19) |           /* CbpDcY */
838                   (1 << 18) |           /* CbpDcU */
839                   (1 << 17) |           /* CbpDcV */
840                   (msg[0] & 0xFFFF) );
841
842     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
843     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
844     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
845
846     /*Stuff for Intra MB*/
847     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
848     OUT_BCS_BATCH(batch, msg[2]);       
849     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
850     
851     /*MaxSizeInWord and TargetSzieInWord*/
852     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
853                   (target_mb_size << 16) );
854
855     ADVANCE_BCS_BATCH(batch);
856
857     return len_in_dwords;
858 }
859
860 static int
861 gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
862                               unsigned int *msg, unsigned int offset,
863                               struct intel_encoder_context *encoder_context,
864                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
865                               struct intel_batchbuffer *batch)
866 {
867     int len_in_dwords = 11;
868
869     if (batch == NULL)
870         batch = encoder_context->base.batch;
871
872     BEGIN_BCS_BATCH(batch, len_in_dwords);
873
874     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
875
876     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
877     OUT_BCS_BATCH(batch, offset);
878
879     OUT_BCS_BATCH(batch, msg[0]);
880
881     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
882     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
883 #if 0 
884     if ( slice_type == SLICE_TYPE_B) {
885         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
886     } else {
887         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
888     }
889 #else
890     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
891 #endif
892
893
894     /*Stuff for Inter MB*/
895     OUT_BCS_BATCH(batch, msg[1]);        
896     OUT_BCS_BATCH(batch, 0x0);    
897     OUT_BCS_BATCH(batch, 0x0);        
898
899     /*MaxSizeInWord and TargetSzieInWord*/
900     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
901                   (target_mb_size << 16) );
902
903     ADVANCE_BCS_BATCH(batch);
904
905     return len_in_dwords;
906 }
907
908 static void 
909 gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
910                                        struct encode_state *encode_state,
911                                        struct intel_encoder_context *encoder_context,
912                                        int slice_index,
913                                        struct intel_batchbuffer *slice_batch)
914 {
915     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
916     struct gen6_vme_context *vme_context = encoder_context->vme_context;
917     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
918     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
919     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
920     unsigned int *msg = NULL, offset = 0;
921     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
922     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
923     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
924     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
925     int i,x,y;
926     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
927     unsigned int rate_control_mode = encoder_context->rate_control_mode;
928     unsigned char *slice_header = NULL;
929     int slice_header_length_in_bits = 0;
930     unsigned int tail_data[] = { 0x0, 0x0 };
931     int slice_type = pSliceParameter->slice_type;
932
933
934     if (rate_control_mode == VA_RC_CBR) {
935         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
936         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
937     }
938
939     /* only support for 8-bit pixel bit-depth */
940     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
941     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
942     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
943     assert(qp >= 0 && qp < 52);
944
945     gen6_mfc_avc_slice_state(ctx, 
946                              pPicParameter,
947                              pSliceParameter,
948                              encode_state, encoder_context,
949                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
950
951     if ( slice_index == 0) 
952         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
953
954     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
955
956     // slice hander
957     mfc_context->insert_object(ctx, encoder_context,
958                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
959                                5,  /* first 5 bytes are start code + nal unit type */
960                                1, 0, 1, slice_batch);
961
962     dri_bo_map(vme_context->vme_output.bo , 1);
963     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
964
965     if (is_intra) {
966         msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
967     } else {
968         msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
969         msg += 32; /* the first 32 DWs are MVs */
970         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
971     }
972    
973     for (i = pSliceParameter->macroblock_address; 
974          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
975         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
976         x = i % width_in_mbs;
977         y = i / width_in_mbs;
978
979         if (is_intra) {
980             assert(msg);
981             gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
982             msg += INTRA_VME_OUTPUT_IN_DWS;
983         } else {
984             if (msg[0] & INTRA_MB_FLAG_MASK) {
985                 gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
986             } else {
987                 gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
988             }
989
990             msg += INTER_VME_OUTPUT_IN_DWS;
991             offset += INTER_VME_OUTPUT_IN_BYTES;
992         }
993     }
994    
995     dri_bo_unmap(vme_context->vme_output.bo);
996
997     if ( last_slice ) {    
998         mfc_context->insert_object(ctx, encoder_context,
999                                    tail_data, 2, 8,
1000                                    2, 1, 1, 0, slice_batch);
1001     } else {
1002         mfc_context->insert_object(ctx, encoder_context,
1003                                    tail_data, 1, 8,
1004                                    1, 1, 1, 0, slice_batch);
1005     }
1006
1007     free(slice_header);
1008
1009 }
1010
1011 static dri_bo *
1012 gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1013                                   struct encode_state *encode_state,
1014                                   struct intel_encoder_context *encoder_context)
1015 {
1016     struct i965_driver_data *i965 = i965_driver_data(ctx);
1017     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1018     dri_bo *batch_bo = batch->buffer;
1019     int i;
1020
1021     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1022         gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1023     }
1024
1025     intel_batchbuffer_align(batch, 8);
1026     
1027     BEGIN_BCS_BATCH(batch, 2);
1028     OUT_BCS_BATCH(batch, 0);
1029     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1030     ADVANCE_BCS_BATCH(batch);
1031
1032     dri_bo_reference(batch_bo);
1033     intel_batchbuffer_free(batch);
1034
1035     return batch_bo;
1036 }
1037
1038 #else
1039
1040 static void
1041 gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1042                                     struct encode_state *encode_state,
1043                                     struct intel_encoder_context *encoder_context)
1044
1045 {
1046     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1047     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1048
1049     assert(vme_context->vme_output.bo);
1050     mfc_context->buffer_suface_setup(ctx,
1051                                      &mfc_context->gpe_context,
1052                                      &vme_context->vme_output,
1053                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1054                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1055     assert(mfc_context->aux_batchbuffer_surface.bo);
1056     mfc_context->buffer_suface_setup(ctx,
1057                                      &mfc_context->gpe_context,
1058                                      &mfc_context->aux_batchbuffer_surface,
1059                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1060                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1061 }
1062
1063 static void
1064 gen6_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1065                                      struct encode_state *encode_state,
1066                                      struct intel_encoder_context *encoder_context)
1067
1068 {
1069     struct i965_driver_data *i965 = i965_driver_data(ctx);
1070     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1071     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1072     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1073     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1074     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1075     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1076     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1077     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1078                                                            "MFC batchbuffer",
1079                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1080                                                            0x1000);
1081     mfc_context->buffer_suface_setup(ctx,
1082                                      &mfc_context->gpe_context,
1083                                      &mfc_context->mfc_batchbuffer_surface,
1084                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1085                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1086 }
1087
1088 static void
1089 gen6_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1090                                     struct encode_state *encode_state,
1091                                     struct intel_encoder_context *encoder_context)
1092 {
1093     gen6_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1094     gen6_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1095 }
1096
1097 static void
1098 gen6_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1099                                 struct encode_state *encode_state,
1100                                 struct intel_encoder_context *encoder_context)
1101 {
1102     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1103     struct gen6_interface_descriptor_data *desc;   
1104     int i;
1105     dri_bo *bo;
1106
1107     bo = mfc_context->gpe_context.idrt.bo;
1108     dri_bo_map(bo, 1);
1109     assert(bo->virtual);
1110     desc = bo->virtual;
1111
1112     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1113         struct i965_kernel *kernel;
1114
1115         kernel = &mfc_context->gpe_context.kernels[i];
1116         assert(sizeof(*desc) == 32);
1117
1118         /*Setup the descritor table*/
1119         memset(desc, 0, sizeof(*desc));
1120         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1121         desc->desc2.sampler_count = 0;
1122         desc->desc2.sampler_state_pointer = 0;
1123         desc->desc3.binding_table_entry_count = 2;
1124         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1125         desc->desc4.constant_urb_entry_read_offset = 0;
1126         desc->desc4.constant_urb_entry_read_length = 4;
1127                 
1128         /*kernel start*/
1129         dri_bo_emit_reloc(bo,   
1130                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1131                           0,
1132                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1133                           kernel->bo);
1134         desc++;
1135     }
1136
1137     dri_bo_unmap(bo);
1138 }
1139
1140 static void
1141 gen6_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1142                                     struct encode_state *encode_state,
1143                                     struct intel_encoder_context *encoder_context)
1144 {
1145     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1146     
1147     (void)mfc_context;
1148 }
1149
1150 static void
1151 gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1152                                          int index,
1153                                          int head_offset,
1154                                          int batchbuffer_offset,
1155                                          int head_size,
1156                                          int tail_size,
1157                                          int number_mb_cmds,
1158                                          int first_object,
1159                                          int last_object,
1160                                          int last_slice,
1161                                          int mb_x,
1162                                          int mb_y,
1163                                          int width_in_mbs,
1164                                          int qp)
1165 {
1166     BEGIN_BATCH(batch, 12);
1167     
1168     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1169     OUT_BATCH(batch, index);
1170     OUT_BATCH(batch, 0);
1171     OUT_BATCH(batch, 0);
1172     OUT_BATCH(batch, 0);
1173     OUT_BATCH(batch, 0);
1174    
1175     /*inline data */
1176     OUT_BATCH(batch, head_offset);
1177     OUT_BATCH(batch, batchbuffer_offset);
1178     OUT_BATCH(batch, 
1179               head_size << 16 |
1180               tail_size);
1181     OUT_BATCH(batch,
1182               number_mb_cmds << 16 |
1183               first_object << 2 |
1184               last_object << 1 |
1185               last_slice);
1186     OUT_BATCH(batch,
1187               mb_y << 8 |
1188               mb_x);
1189     OUT_BATCH(batch,
1190               qp << 16 |
1191               width_in_mbs);
1192
1193     ADVANCE_BATCH(batch);
1194 }
1195
1196 static void
1197 gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1198                                        struct intel_encoder_context *encoder_context,
1199                                        VAEncSliceParameterBufferH264 *slice_param,
1200                                        int head_offset,
1201                                        unsigned short head_size,
1202                                        unsigned short tail_size,
1203                                        int batchbuffer_offset,
1204                                        int qp,
1205                                        int last_slice)
1206 {
1207     struct intel_batchbuffer *batch = encoder_context->base.batch;
1208     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1209     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1210     int total_mbs = slice_param->num_macroblocks;
1211     int number_mb_cmds = 128;
1212     int starting_mb = 0;
1213     int last_object = 0;
1214     int first_object = 1;
1215     int i;
1216     int mb_x, mb_y;
1217     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1218
1219     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1220         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1221         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1222         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1223         assert(mb_x <= 255 && mb_y <= 255);
1224
1225         starting_mb += number_mb_cmds;
1226
1227         gen6_mfc_batchbuffer_emit_object_command(batch,
1228                                                  index,
1229                                                  head_offset,
1230                                                  batchbuffer_offset,
1231                                                  head_size,
1232                                                  tail_size,
1233                                                  number_mb_cmds,
1234                                                  first_object,
1235                                                  last_object,
1236                                                  last_slice,
1237                                                  mb_x,
1238                                                  mb_y,
1239                                                  width_in_mbs,
1240                                                  qp);
1241
1242         if (first_object) {
1243             head_offset += head_size;
1244             batchbuffer_offset += head_size;
1245         }
1246
1247         if (last_object) {
1248             head_offset += tail_size;
1249             batchbuffer_offset += tail_size;
1250         }
1251
1252         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1253
1254         first_object = 0;
1255     }
1256
1257     if (!last_object) {
1258         last_object = 1;
1259         number_mb_cmds = total_mbs % number_mb_cmds;
1260         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1261         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1262         assert(mb_x <= 255 && mb_y <= 255);
1263         starting_mb += number_mb_cmds;
1264
1265         gen6_mfc_batchbuffer_emit_object_command(batch,
1266                                                  index,
1267                                                  head_offset,
1268                                                  batchbuffer_offset,
1269                                                  head_size,
1270                                                  tail_size,
1271                                                  number_mb_cmds,
1272                                                  first_object,
1273                                                  last_object,
1274                                                  last_slice,
1275                                                  mb_x,
1276                                                  mb_y,
1277                                                  width_in_mbs,
1278                                                  qp);
1279     }
1280 }
1281                           
1282 /*
1283  * return size in Owords (16bytes)
1284  */         
1285 static int
1286 gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1287                                struct encode_state *encode_state,
1288                                struct intel_encoder_context *encoder_context,
1289                                int slice_index,
1290                                int batchbuffer_offset)
1291 {
1292     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1293     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1294     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1295     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1296     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1297     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1298     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1299     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1300     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1301     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1302     unsigned char *slice_header = NULL;
1303     int slice_header_length_in_bits = 0;
1304     unsigned int tail_data[] = { 0x0, 0x0 };
1305     long head_offset;
1306     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1307     unsigned short head_size, tail_size;
1308     int slice_type = pSliceParameter->slice_type;
1309
1310     if (rate_control_mode == VA_RC_CBR) {
1311         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1312         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1313     }
1314
1315     /* only support for 8-bit pixel bit-depth */
1316     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1317     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1318     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1319     assert(qp >= 0 && qp < 52);
1320
1321     head_offset = old_used / 16;
1322     gen6_mfc_avc_slice_state(ctx,
1323                              pPicParameter,
1324                              pSliceParameter,
1325                              encode_state,
1326                              encoder_context,
1327                              (rate_control_mode == VA_RC_CBR),
1328                              qp,
1329                              slice_batch);
1330
1331     if (slice_index == 0)
1332         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1333
1334     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1335
1336     // slice hander
1337     mfc_context->insert_object(ctx,
1338                                encoder_context,
1339                                (unsigned int *)slice_header,
1340                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1341                                slice_header_length_in_bits & 0x1f,
1342                                5,  /* first 5 bytes are start code + nal unit type */
1343                                1,
1344                                0,
1345                                1,
1346                                slice_batch);
1347     free(slice_header);
1348
1349     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1350     used = intel_batchbuffer_used_size(slice_batch);
1351     head_size = (used - old_used) / 16;
1352     old_used = used;
1353
1354     /* tail */
1355     if (last_slice) {    
1356         mfc_context->insert_object(ctx,
1357                                    encoder_context,
1358                                    tail_data,
1359                                    2,
1360                                    8,
1361                                    2,
1362                                    1,
1363                                    1,
1364                                    0,
1365                                    slice_batch);
1366     } else {
1367         mfc_context->insert_object(ctx,
1368                                    encoder_context,
1369                                    tail_data,
1370                                    1,
1371                                    8,
1372                                    1,
1373                                    1,
1374                                    1,
1375                                    0,
1376                                    slice_batch);
1377     }
1378
1379     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1380     used = intel_batchbuffer_used_size(slice_batch);
1381     tail_size = (used - old_used) / 16;
1382
1383    
1384     gen6_mfc_avc_batchbuffer_slice_command(ctx,
1385                                            encoder_context,
1386                                            pSliceParameter,
1387                                            head_offset,
1388                                            head_size,
1389                                            tail_size,
1390                                            batchbuffer_offset,
1391                                            qp,
1392                                            last_slice);
1393
1394     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1395 }
1396
1397 static void
1398 gen6_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1399                                   struct encode_state *encode_state,
1400                                   struct intel_encoder_context *encoder_context)
1401 {
1402     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1403     struct intel_batchbuffer *batch = encoder_context->base.batch;
1404     int i, size, offset = 0;
1405     intel_batchbuffer_start_atomic(batch, 0x4000); 
1406     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1407
1408     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1409         size = gen6_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1410         offset += size;
1411     }
1412
1413     intel_batchbuffer_end_atomic(batch);
1414     intel_batchbuffer_flush(batch);
1415 }
1416
1417 static void
1418 gen6_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1419                                struct encode_state *encode_state,
1420                                struct intel_encoder_context *encoder_context)
1421 {
1422     gen6_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1423     gen6_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1424     gen6_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1425     gen6_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1426 }
1427
1428 static dri_bo *
1429 gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1430                                   struct encode_state *encode_state,
1431                                   struct intel_encoder_context *encoder_context)
1432 {
1433     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1434
1435     gen6_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1436     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1437
1438     return mfc_context->mfc_batchbuffer_surface.bo;
1439 }
1440
1441 #endif
1442
1443
1444 static void
1445 gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
1446                                  struct encode_state *encode_state,
1447                                  struct intel_encoder_context *encoder_context)
1448 {
1449     struct intel_batchbuffer *batch = encoder_context->base.batch;
1450     dri_bo *slice_batch_bo;
1451
1452     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1453         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1454         assert(0);
1455         return; 
1456     }
1457
1458 #if __SOFTWARE__
1459     slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1460 #else
1461     slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1462 #endif
1463
1464     // begin programing
1465     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1466     intel_batchbuffer_emit_mi_flush(batch);
1467     
1468     // picture level programing
1469     gen6_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1470
1471     BEGIN_BCS_BATCH(batch, 2);
1472     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1473     OUT_BCS_RELOC(batch,
1474                   slice_batch_bo,
1475                   I915_GEM_DOMAIN_COMMAND, 0, 
1476                   0);
1477     ADVANCE_BCS_BATCH(batch);
1478
1479     // end programing
1480     intel_batchbuffer_end_atomic(batch);
1481
1482     dri_bo_unreference(slice_batch_bo);
1483 }
1484
1485 static VAStatus
1486 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1487                             struct encode_state *encode_state,
1488                             struct intel_encoder_context *encoder_context)
1489 {
1490     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1491     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1492     int current_frame_bits_size;
1493     int sts;
1494  
1495     for (;;) {
1496         gen6_mfc_init(ctx, encoder_context);
1497         gen6_mfc_avc_prepare(ctx, encode_state, encoder_context);
1498         /*Programing bcs pipeline*/
1499         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1500         gen6_mfc_run(ctx, encode_state, encoder_context);
1501         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1502             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1503             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1504             if (sts == BRC_NO_HRD_VIOLATION) {
1505                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1506                 break;
1507             }
1508             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1509                 if (!mfc_context->hrd.violation_noted) {
1510                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1511                     mfc_context->hrd.violation_noted = 1;
1512                 }
1513                 return VA_STATUS_SUCCESS;
1514             }
1515         } else {
1516             break;
1517         }
1518     }
1519
1520     return VA_STATUS_SUCCESS;
1521 }
1522
1523 VAStatus
1524 gen6_mfc_pipeline(VADriverContextP ctx,
1525                   VAProfile profile,
1526                   struct encode_state *encode_state,
1527                   struct intel_encoder_context *encoder_context)
1528 {
1529     VAStatus vaStatus;
1530
1531     switch (profile) {
1532     case VAProfileH264Baseline:
1533     case VAProfileH264Main:
1534     case VAProfileH264High:
1535         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1536         break;
1537
1538         /* FIXME: add for other profile */
1539     default:
1540         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1541         break;
1542     }
1543
1544     return vaStatus;
1545 }
1546
1547 void
1548 gen6_mfc_context_destroy(void *context)
1549 {
1550     struct gen6_mfc_context *mfc_context = context;
1551     int i;
1552
1553     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1554     mfc_context->post_deblocking_output.bo = NULL;
1555
1556     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1557     mfc_context->pre_deblocking_output.bo = NULL;
1558
1559     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1560     mfc_context->uncompressed_picture_source.bo = NULL;
1561
1562     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1563     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1564
1565     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1566         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1567         mfc_context->direct_mv_buffers[i].bo = NULL;
1568     }
1569
1570     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1571     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1572
1573     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1574     mfc_context->macroblock_status_buffer.bo = NULL;
1575
1576     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1577     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1578
1579     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1580     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1581
1582
1583     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1584         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1585         mfc_context->reference_surfaces[i].bo = NULL;  
1586     }
1587
1588     i965_gpe_context_destroy(&mfc_context->gpe_context);
1589
1590     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1591     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1592
1593     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1594     mfc_context->aux_batchbuffer_surface.bo = NULL;
1595
1596     if (mfc_context->aux_batchbuffer)
1597         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1598
1599     mfc_context->aux_batchbuffer = NULL;
1600
1601     free(mfc_context);
1602 }
1603
1604 Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1605 {
1606     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1607
1608     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1609
1610     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1611     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1612
1613     mfc_context->gpe_context.curbe.length = 32 * 4;
1614
1615     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1616     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1617     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1618     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1619     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1620
1621     i965_gpe_load_kernels(ctx,
1622                           &mfc_context->gpe_context,
1623                           gen6_mfc_kernels,
1624                           NUM_MFC_KERNEL);
1625
1626     mfc_context->pipe_mode_select = gen6_mfc_pipe_mode_select;
1627     mfc_context->set_surface_state = gen6_mfc_surface_state;
1628     mfc_context->ind_obj_base_addr_state = gen6_mfc_ind_obj_base_addr_state;
1629     mfc_context->avc_img_state = gen6_mfc_avc_img_state;
1630     mfc_context->avc_qm_state = gen6_mfc_avc_qm_state;
1631     mfc_context->avc_fqm_state = gen6_mfc_avc_fqm_state;
1632     mfc_context->insert_object = gen6_mfc_avc_insert_object;
1633     mfc_context->buffer_suface_setup = i965_gpe_buffer_suface_setup;
1634
1635     encoder_context->mfc_context = mfc_context;
1636     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
1637     encoder_context->mfc_pipeline = gen6_mfc_pipeline;
1638     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1639
1640     return True;
1641 }