Add the bidirectional MVP to optimize the VME parameter on Ivb
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include <math.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen6_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
46 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
47 };
48
49 static const uint32_t gen6_mfc_batchbuffer_avc_inter[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_inter.g6b"
51 };
52
53 static struct i965_kernel gen6_mfc_kernels[] = {
54     {
55         "MFC AVC INTRA BATCHBUFFER ",
56         MFC_BATCHBUFFER_AVC_INTRA,
57         gen6_mfc_batchbuffer_avc_intra,
58         sizeof(gen6_mfc_batchbuffer_avc_intra),
59         NULL
60     },
61
62     {
63         "MFC AVC INTER BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTER,
65         gen6_mfc_batchbuffer_avc_inter,
66         sizeof(gen6_mfc_batchbuffer_avc_inter),
67         NULL
68     },
69 };
70
71 static void
72 gen6_mfc_pipe_mode_select(VADriverContextP ctx,
73                           int standard_select,
74                           struct intel_encoder_context *encoder_context)
75 {
76     struct intel_batchbuffer *batch = encoder_context->base.batch;
77     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
78
79     assert(standard_select == MFX_FORMAT_AVC);
80
81     BEGIN_BCS_BATCH(batch, 4);
82
83     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
84     OUT_BCS_BATCH(batch,
85                   (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
86                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
87                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
88                   (0 << 7)  | /* disable TLB prefectch */
89                   (0 << 5)  | /* not in stitch mode */
90                   (1 << 4)  | /* encoding mode */
91                   (2 << 0));  /* Standard Select: AVC */
92     OUT_BCS_BATCH(batch,
93                   (0 << 20) | /* round flag in PB slice */
94                   (0 << 19) | /* round flag in Intra8x8 */
95                   (0 << 7)  | /* expand NOA bus flag */
96                   (1 << 6)  | /* must be 1 */
97                   (0 << 5)  | /* disable clock gating for NOA */
98                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
99                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
100                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
101                   (0 << 1)  | /* AVC long field motion vector */
102                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
103     OUT_BCS_BATCH(batch, 0);
104
105     ADVANCE_BCS_BATCH(batch);
106 }
107
108 static void
109 gen6_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
110 {
111     struct intel_batchbuffer *batch = encoder_context->base.batch;
112     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
113
114     BEGIN_BCS_BATCH(batch, 6);
115
116     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
117     OUT_BCS_BATCH(batch, 0);
118     OUT_BCS_BATCH(batch,
119                   ((mfc_context->surface_state.height - 1) << 19) |
120                   ((mfc_context->surface_state.width - 1) << 6));
121     OUT_BCS_BATCH(batch,
122                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
123                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
124                   (0 << 22) | /* surface object control state, FIXME??? */
125                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
126                   (0 << 2)  | /* must be 0 for interleave U/V */
127                   (1 << 1)  | /* must be y-tiled */
128                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
129     OUT_BCS_BATCH(batch,
130                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
131                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
132     OUT_BCS_BATCH(batch, 0);
133     ADVANCE_BCS_BATCH(batch);
134 }
135
136 static void
137 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
138 {
139     struct intel_batchbuffer *batch = encoder_context->base.batch;
140     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
141     int i;
142
143     BEGIN_BCS_BATCH(batch, 24);
144
145     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
146
147     if (mfc_context->pre_deblocking_output.bo)
148         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
149                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
150                       0);
151     else
152         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
153
154     if (mfc_context->post_deblocking_output.bo)
155         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
156                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
157                       0);                                                                                       /* post output addr  */ 
158     else
159         OUT_BCS_BATCH(batch, 0);
160
161     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
162                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
163                   0);                                                                                   /* uncompressed data */
164     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
165                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
166                   0);                                                                                   /* StreamOut data*/
167     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
168                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
169                   0);   
170     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
171                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
172                   0);
173     /* 7..22 Reference pictures*/
174     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
175         if ( mfc_context->reference_surfaces[i].bo != NULL) {
176             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
177                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                           0);                   
179         } else {
180             OUT_BCS_BATCH(batch, 0);
181         }
182     }
183     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* Macroblock status buffer*/
186
187     ADVANCE_BCS_BATCH(batch);
188 }
189
190 static void
191 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
192 {
193     struct intel_batchbuffer *batch = encoder_context->base.batch;
194     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
195     struct gen6_vme_context *vme_context = encoder_context->vme_context;
196
197     BEGIN_BCS_BATCH(batch, 11);
198
199     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
200     OUT_BCS_BATCH(batch, 0);
201     OUT_BCS_BATCH(batch, 0);
202     /* MFX Indirect MV Object Base Address */
203     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
204     OUT_BCS_BATCH(batch, 0);    
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207     OUT_BCS_BATCH(batch, 0);
208     OUT_BCS_BATCH(batch, 0);
209     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
210     OUT_BCS_RELOC(batch,
211                   mfc_context->mfc_indirect_pak_bse_object.bo,
212                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
213                   0);
214     OUT_BCS_RELOC(batch,
215                   mfc_context->mfc_indirect_pak_bse_object.bo,
216                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
218
219     ADVANCE_BCS_BATCH(batch);
220 }
221
222 static void
223 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
224 {
225     struct intel_batchbuffer *batch = encoder_context->base.batch;
226     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
227
228     BEGIN_BCS_BATCH(batch, 4);
229
230     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
231     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
232                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233                   0);
234     OUT_BCS_BATCH(batch, 0);
235     OUT_BCS_BATCH(batch, 0);
236
237     ADVANCE_BCS_BATCH(batch);
238 }
239
240 static void
241 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
242                        struct intel_encoder_context *encoder_context)
243 {
244     struct intel_batchbuffer *batch = encoder_context->base.batch;
245     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
246     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
247     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
248     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
249     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
250
251     BEGIN_BCS_BATCH(batch, 13);
252     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
253     OUT_BCS_BATCH(batch, 
254                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
255     OUT_BCS_BATCH(batch, 
256                   (height_in_mbs << 16) | 
257                   (width_in_mbs << 0));
258     OUT_BCS_BATCH(batch, 
259                   (0 << 24) |     /*Second Chroma QP Offset*/
260                   (0 << 16) |     /*Chroma QP Offset*/
261                   (0 << 14) |   /*Max-bit conformance Intra flag*/
262                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
263                   (1 << 12) |   /*Should always be written as "1" */
264                   (0 << 10) |   /*QM Preset FLag */
265                   (0 << 8)  |   /*Image Structure*/
266                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
267     OUT_BCS_BATCH(batch,
268                   (400 << 16) |   /*Mininum Frame size*/        
269                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
270                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
271                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
272                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
273                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
274                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
275                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
276                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
277                   (pSequenceParameter->seq_fields.bits.direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
278                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
279                   (1 << 2)  |   /*Frame MB only flag*/
280                   (0 << 1)  |   /*MBAFF mode is in active*/
281                   (0 << 0) );   /*Field picture flag*/
282     OUT_BCS_BATCH(batch, 
283                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
284                   (1<<12)   |   
285                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
286                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
287                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
288                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
289                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
290     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
291                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
292                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
293     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
294     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
295     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
296     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
297     OUT_BCS_BATCH(batch, 0x00800001);   
298     OUT_BCS_BATCH(batch, 0);
299
300     ADVANCE_BCS_BATCH(batch);
301 }
302
303 static void
304 gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
305 {
306     struct intel_batchbuffer *batch = encoder_context->base.batch;
307     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
308
309     int i;
310
311     BEGIN_BCS_BATCH(batch, 69);
312
313     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
314
315     /* Reference frames and Current frames */
316     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
317         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
318             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
319                           I915_GEM_DOMAIN_INSTRUCTION, 0,
320                           0);
321         } else {
322             OUT_BCS_BATCH(batch, 0);
323         }
324     }
325
326     /* POL list */
327     for(i = 0; i < 32; i++) {
328         OUT_BCS_BATCH(batch, i/2);
329     }
330     OUT_BCS_BATCH(batch, 0);
331     OUT_BCS_BATCH(batch, 0);
332
333     ADVANCE_BCS_BATCH(batch);
334 }
335
336 static void
337 gen6_mfc_avc_slice_state(VADriverContextP ctx,
338                          VAEncPictureParameterBufferH264 *pic_param,
339                          VAEncSliceParameterBufferH264 *slice_param,
340                          struct encode_state *encode_state,
341                          struct intel_encoder_context *encoder_context,
342                          int rate_control_enable,
343                          int qp,
344                          struct intel_batchbuffer *batch)
345 {
346     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
347     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
348     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
349     int beginmb = slice_param->macroblock_address;
350     int endmb = beginmb + slice_param->num_macroblocks;
351     int beginx = beginmb % width_in_mbs;
352     int beginy = beginmb / width_in_mbs;
353     int nextx =  endmb % width_in_mbs;
354     int nexty = endmb / width_in_mbs;
355     int slice_type = slice_param->slice_type;
356     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
357     int bit_rate_control_target, maxQpN, maxQpP;
358     unsigned char correct[6], grow, shrink;
359     int i;
360     int weighted_pred_idc = 0;
361     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
362     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
363     int bslice = 0;
364
365     if (batch == NULL)
366         batch = encoder_context->base.batch;
367
368     bit_rate_control_target = slice_type;
369     if (slice_type == SLICE_TYPE_SP)
370         bit_rate_control_target = SLICE_TYPE_P;
371     else if (slice_type == SLICE_TYPE_SI)
372         bit_rate_control_target = SLICE_TYPE_I;
373
374     if (slice_type == SLICE_TYPE_P) {
375         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
376     } else if (slice_type == SLICE_TYPE_B) {
377         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
378         bslice = 1;
379
380         if (weighted_pred_idc == 2) {
381             /* 8.4.3 - Derivation process for prediction weights (8-279) */
382             luma_log2_weight_denom = 5;
383             chroma_log2_weight_denom = 5;
384         }
385     }
386
387     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
388     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
389
390     for (i = 0; i < 6; i++)
391         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
392
393     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
394         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
395     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
396         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
397
398     BEGIN_BCS_BATCH(batch, 11);;
399
400     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
401     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
402
403     if (slice_type == SLICE_TYPE_I) {
404         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
405     } else {
406         OUT_BCS_BATCH(batch,
407                       (1 << 16) | (bslice << 24) |     /*1 reference frame*/
408                       (chroma_log2_weight_denom << 8) |
409                       (luma_log2_weight_denom << 0));
410     }
411
412     OUT_BCS_BATCH(batch, 
413                   (weighted_pred_idc << 30) |
414                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
415                   (slice_param->disable_deblocking_filter_idc << 27) |
416                   (slice_param->cabac_init_idc << 24) |
417                   (qp<<16) |                    /*Slice Quantization Parameter*/
418                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
419                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
420     OUT_BCS_BATCH(batch,
421                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
422                   (beginx << 16) |
423                   slice_param->macroblock_address );
424     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
425     OUT_BCS_BATCH(batch, 
426                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
427                   (1 << 30) |           /*ResetRateControlCounter*/
428                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
429                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
430                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
431                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
432                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
433                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
434                   (last_slice << 19) |     /*IsLastSlice*/
435                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
436                   (1 << 17) |       /*HeaderPresentFlag*/       
437                   (1 << 16) |       /*SliceData PresentFlag*/
438                   (1 << 15) |       /*TailPresentFlag*/
439                   (1 << 13) |       /*RBSP NAL TYPE*/   
440                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
441     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
442     OUT_BCS_BATCH(batch,
443                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
444                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
445                   (shrink << 8)  |
446                   (grow << 0));   
447     OUT_BCS_BATCH(batch,
448                   (correct[5] << 20) |
449                   (correct[4] << 16) |
450                   (correct[3] << 12) |
451                   (correct[2] << 8) |
452                   (correct[1] << 4) |
453                   (correct[0] << 0));
454     OUT_BCS_BATCH(batch, 0);
455
456     ADVANCE_BCS_BATCH(batch);
457 }
458 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
459 {
460     struct intel_batchbuffer *batch = encoder_context->base.batch;
461     int i;
462
463     BEGIN_BCS_BATCH(batch, 58);
464
465     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
466     OUT_BCS_BATCH(batch, 0xFF ) ; 
467     for( i = 0; i < 56; i++) {
468         OUT_BCS_BATCH(batch, 0x10101010); 
469     }   
470
471     ADVANCE_BCS_BATCH(batch);
472 }
473
474 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
475 {
476     struct intel_batchbuffer *batch = encoder_context->base.batch;
477     int i;
478
479     BEGIN_BCS_BATCH(batch, 113);
480     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
481
482     for(i = 0; i < 112;i++) {
483         OUT_BCS_BATCH(batch, 0x10001000);
484     }   
485
486     ADVANCE_BCS_BATCH(batch);   
487 }
488
489 static void
490 gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
491 {
492     struct intel_batchbuffer *batch = encoder_context->base.batch;
493     int i;
494
495     BEGIN_BCS_BATCH(batch, 10);
496     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
497     OUT_BCS_BATCH(batch, 0);                  //Select L0
498     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
499     for(i = 0; i < 7; i++) {
500         OUT_BCS_BATCH(batch, 0x80808080);
501     }   
502     ADVANCE_BCS_BATCH(batch);
503
504     BEGIN_BCS_BATCH(batch, 10);
505     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
506     OUT_BCS_BATCH(batch, 1);                  //Select L1
507     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
508     for(i = 0; i < 7; i++) {
509         OUT_BCS_BATCH(batch, 0x80808080);
510     }   
511     ADVANCE_BCS_BATCH(batch);
512 }
513         
514 static void
515 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
516                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
517                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
518                            struct intel_batchbuffer *batch)
519 {
520     if (batch == NULL)
521         batch = encoder_context->base.batch;
522
523     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
524
525     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
526
527     OUT_BCS_BATCH(batch,
528                   (0 << 16) |   /* always start at offset 0 */
529                   (data_bits_in_last_dw << 8) |
530                   (skip_emul_byte_count << 4) |
531                   (!!emulation_flag << 3) |
532                   ((!!is_last_header) << 2) |
533                   ((!!is_end_of_slice) << 1) |
534                   (0 << 0));    /* FIXME: ??? */
535
536     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
537     ADVANCE_BCS_BATCH(batch);
538 }
539
540 static void gen6_mfc_init(VADriverContextP ctx, 
541                             struct encode_state *encode_state,
542                             struct intel_encoder_context *encoder_context)
543 {
544     struct i965_driver_data *i965 = i965_driver_data(ctx);
545     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
546     dri_bo *bo;
547     int i;
548     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
549     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
550     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
551
552     /*Encode common setup for MFC*/
553     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
554     mfc_context->post_deblocking_output.bo = NULL;
555
556     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
557     mfc_context->pre_deblocking_output.bo = NULL;
558
559     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
560     mfc_context->uncompressed_picture_source.bo = NULL;
561
562     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
563     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
564
565     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
566         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
567         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
568         mfc_context->direct_mv_buffers[i].bo = NULL;
569     }
570
571     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
572         if (mfc_context->reference_surfaces[i].bo != NULL)
573             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
574         mfc_context->reference_surfaces[i].bo = NULL;  
575     }
576
577     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
578     bo = dri_bo_alloc(i965->intel.bufmgr,
579                       "Buffer",
580                       width_in_mbs * 64,
581                       64);
582     assert(bo);
583     mfc_context->intra_row_store_scratch_buffer.bo = bo;
584
585     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
586     bo = dri_bo_alloc(i965->intel.bufmgr,
587                       "Buffer",
588                       width_in_mbs * height_in_mbs * 16,
589                       64);
590     assert(bo);
591     mfc_context->macroblock_status_buffer.bo = bo;
592
593     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
594     bo = dri_bo_alloc(i965->intel.bufmgr,
595                       "Buffer",
596                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
597                       64);
598     assert(bo);
599     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
600
601     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
602     bo = dri_bo_alloc(i965->intel.bufmgr,
603                       "Buffer",
604                       128 * width_in_mbs, /* 2 * widht_in_mbs * 64 */
605                       0x1000);
606     assert(bo);
607     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
608
609     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
610     mfc_context->mfc_batchbuffer_surface.bo = NULL;
611
612     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
613     mfc_context->aux_batchbuffer_surface.bo = NULL;
614
615     if (mfc_context->aux_batchbuffer)
616         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
617
618     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
619     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
620     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
621     mfc_context->aux_batchbuffer_surface.pitch = 16;
622     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
623     mfc_context->aux_batchbuffer_surface.size_block = 16;
624
625     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
626 }
627
628 static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
629                                       struct encode_state *encode_state,
630                                       struct intel_encoder_context *encoder_context)
631 {
632     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
633
634     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
635     mfc_context->set_surface_state(ctx, encoder_context);
636     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
637     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
638     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
639     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
640     mfc_context->avc_qm_state(ctx, encoder_context);
641     mfc_context->avc_fqm_state(ctx, encoder_context);
642     gen6_mfc_avc_directmode_state(ctx, encoder_context); 
643     gen6_mfc_avc_ref_idx_state(ctx, encoder_context);
644 }
645
646
647 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
648                              struct encode_state *encode_state,
649                              struct intel_encoder_context *encoder_context)
650 {
651     struct intel_batchbuffer *batch = encoder_context->base.batch;
652
653     intel_batchbuffer_flush(batch);             //run the pipeline
654
655     return VA_STATUS_SUCCESS;
656 }
657
658 static VAStatus
659 gen6_mfc_stop(VADriverContextP ctx, 
660               struct encode_state *encode_state,
661               struct intel_encoder_context *encoder_context,
662               int *encoded_bits_size)
663 {
664     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
665     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
666     VACodedBufferSegment *coded_buffer_segment;
667     
668     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
669     assert(vaStatus == VA_STATUS_SUCCESS);
670     *encoded_bits_size = coded_buffer_segment->size * 8;
671     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
672
673     return VA_STATUS_SUCCESS;
674 }
675
676 #if __SOFTWARE__
677
678 static int
679 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
680                               struct intel_encoder_context *encoder_context,
681                               unsigned char target_mb_size, unsigned char max_mb_size,
682                               struct intel_batchbuffer *batch)
683 {
684     int len_in_dwords = 11;
685
686     if (batch == NULL)
687         batch = encoder_context->base.batch;
688
689     BEGIN_BCS_BATCH(batch, len_in_dwords);
690
691     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
692     OUT_BCS_BATCH(batch, 0);
693     OUT_BCS_BATCH(batch, 0);
694     OUT_BCS_BATCH(batch, 
695                   (0 << 24) |           /* PackedMvNum, Debug*/
696                   (0 << 20) |           /* No motion vector */
697                   (1 << 19) |           /* CbpDcY */
698                   (1 << 18) |           /* CbpDcU */
699                   (1 << 17) |           /* CbpDcV */
700                   (msg[0] & 0xFFFF) );
701
702     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
703     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
704     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
705
706     /*Stuff for Intra MB*/
707     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
708     OUT_BCS_BATCH(batch, msg[2]);       
709     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
710     
711     /*MaxSizeInWord and TargetSzieInWord*/
712     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
713                   (target_mb_size << 16) );
714
715     ADVANCE_BCS_BATCH(batch);
716
717     return len_in_dwords;
718 }
719
720 static int
721 gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
722                               unsigned int *msg, unsigned int offset,
723                               struct intel_encoder_context *encoder_context,
724                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
725                               struct intel_batchbuffer *batch)
726 {
727     int len_in_dwords = 11;
728
729     if (batch == NULL)
730         batch = encoder_context->base.batch;
731
732     BEGIN_BCS_BATCH(batch, len_in_dwords);
733
734     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
735
736     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
737     OUT_BCS_BATCH(batch, offset);
738
739     OUT_BCS_BATCH(batch, msg[0]);
740
741     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
742     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
743 #if 0 
744     if ( slice_type == SLICE_TYPE_B) {
745         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
746     } else {
747         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
748     }
749 #else
750     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
751 #endif
752
753
754     /*Stuff for Inter MB*/
755     OUT_BCS_BATCH(batch, msg[1]);        
756     OUT_BCS_BATCH(batch, 0x0);    
757     OUT_BCS_BATCH(batch, 0x0);        
758
759     /*MaxSizeInWord and TargetSzieInWord*/
760     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
761                   (target_mb_size << 16) );
762
763     ADVANCE_BCS_BATCH(batch);
764
765     return len_in_dwords;
766 }
767
768 static void 
769 gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
770                                        struct encode_state *encode_state,
771                                        struct intel_encoder_context *encoder_context,
772                                        int slice_index,
773                                        struct intel_batchbuffer *slice_batch)
774 {
775     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
776     struct gen6_vme_context *vme_context = encoder_context->vme_context;
777     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
778     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
779     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
780     unsigned int *msg = NULL, offset = 0;
781     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
782     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
783     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
784     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
785     int i,x,y;
786     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
787     unsigned int rate_control_mode = encoder_context->rate_control_mode;
788     unsigned char *slice_header = NULL;
789     int slice_header_length_in_bits = 0;
790     unsigned int tail_data[] = { 0x0, 0x0 };
791     int slice_type = pSliceParameter->slice_type;
792
793
794     if (rate_control_mode == VA_RC_CBR) {
795         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
796         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
797     }
798
799     /* only support for 8-bit pixel bit-depth */
800     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
801     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
802     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
803     assert(qp >= 0 && qp < 52);
804
805     gen6_mfc_avc_slice_state(ctx, 
806                              pPicParameter,
807                              pSliceParameter,
808                              encode_state, encoder_context,
809                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
810
811     if ( slice_index == 0) 
812         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
813
814     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
815
816     // slice hander
817     mfc_context->insert_object(ctx, encoder_context,
818                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
819                                5,  /* first 5 bytes are start code + nal unit type */
820                                1, 0, 1, slice_batch);
821
822     dri_bo_map(vme_context->vme_output.bo , 1);
823     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
824
825     if (is_intra) {
826         msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
827     } else {
828         msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
829         msg += 32; /* the first 32 DWs are MVs */
830         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
831     }
832    
833     for (i = pSliceParameter->macroblock_address; 
834          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
835         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
836         x = i % width_in_mbs;
837         y = i / width_in_mbs;
838
839         if (is_intra) {
840             assert(msg);
841             gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
842             msg += INTRA_VME_OUTPUT_IN_DWS;
843         } else {
844             if (msg[0] & INTRA_MB_FLAG_MASK) {
845                 gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
846             } else {
847                 gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
848             }
849
850             msg += INTER_VME_OUTPUT_IN_DWS;
851             offset += INTER_VME_OUTPUT_IN_BYTES;
852         }
853     }
854    
855     dri_bo_unmap(vme_context->vme_output.bo);
856
857     if ( last_slice ) {    
858         mfc_context->insert_object(ctx, encoder_context,
859                                    tail_data, 2, 8,
860                                    2, 1, 1, 0, slice_batch);
861     } else {
862         mfc_context->insert_object(ctx, encoder_context,
863                                    tail_data, 1, 8,
864                                    1, 1, 1, 0, slice_batch);
865     }
866
867     free(slice_header);
868
869 }
870
871 static dri_bo *
872 gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
873                                   struct encode_state *encode_state,
874                                   struct intel_encoder_context *encoder_context)
875 {
876     struct i965_driver_data *i965 = i965_driver_data(ctx);
877     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
878     dri_bo *batch_bo = batch->buffer;
879     int i;
880
881     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
882         gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
883     }
884
885     intel_batchbuffer_align(batch, 8);
886     
887     BEGIN_BCS_BATCH(batch, 2);
888     OUT_BCS_BATCH(batch, 0);
889     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
890     ADVANCE_BCS_BATCH(batch);
891
892     dri_bo_reference(batch_bo);
893     intel_batchbuffer_free(batch);
894
895     return batch_bo;
896 }
897
898 #else
899
900 static void
901 gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
902                                     struct encode_state *encode_state,
903                                     struct intel_encoder_context *encoder_context)
904
905 {
906     struct gen6_vme_context *vme_context = encoder_context->vme_context;
907     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
908
909     assert(vme_context->vme_output.bo);
910     mfc_context->buffer_suface_setup(ctx,
911                                      &mfc_context->gpe_context,
912                                      &vme_context->vme_output,
913                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
914                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
915     assert(mfc_context->aux_batchbuffer_surface.bo);
916     mfc_context->buffer_suface_setup(ctx,
917                                      &mfc_context->gpe_context,
918                                      &mfc_context->aux_batchbuffer_surface,
919                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
920                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
921 }
922
923 static void
924 gen6_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
925                                      struct encode_state *encode_state,
926                                      struct intel_encoder_context *encoder_context)
927
928 {
929     struct i965_driver_data *i965 = i965_driver_data(ctx);
930     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
931     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
932     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
933     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
934     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
935     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
936     mfc_context->mfc_batchbuffer_surface.pitch = 16;
937     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
938                                                            "MFC batchbuffer",
939                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
940                                                            0x1000);
941     mfc_context->buffer_suface_setup(ctx,
942                                      &mfc_context->gpe_context,
943                                      &mfc_context->mfc_batchbuffer_surface,
944                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
945                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
946 }
947
948 static void
949 gen6_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
950                                     struct encode_state *encode_state,
951                                     struct intel_encoder_context *encoder_context)
952 {
953     gen6_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
954     gen6_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
955 }
956
957 static void
958 gen6_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
959                                 struct encode_state *encode_state,
960                                 struct intel_encoder_context *encoder_context)
961 {
962     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
963     struct gen6_interface_descriptor_data *desc;   
964     int i;
965     dri_bo *bo;
966
967     bo = mfc_context->gpe_context.idrt.bo;
968     dri_bo_map(bo, 1);
969     assert(bo->virtual);
970     desc = bo->virtual;
971
972     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
973         struct i965_kernel *kernel;
974
975         kernel = &mfc_context->gpe_context.kernels[i];
976         assert(sizeof(*desc) == 32);
977
978         /*Setup the descritor table*/
979         memset(desc, 0, sizeof(*desc));
980         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
981         desc->desc2.sampler_count = 0;
982         desc->desc2.sampler_state_pointer = 0;
983         desc->desc3.binding_table_entry_count = 2;
984         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
985         desc->desc4.constant_urb_entry_read_offset = 0;
986         desc->desc4.constant_urb_entry_read_length = 4;
987                 
988         /*kernel start*/
989         dri_bo_emit_reloc(bo,   
990                           I915_GEM_DOMAIN_INSTRUCTION, 0,
991                           0,
992                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
993                           kernel->bo);
994         desc++;
995     }
996
997     dri_bo_unmap(bo);
998 }
999
1000 static void
1001 gen6_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1002                                     struct encode_state *encode_state,
1003                                     struct intel_encoder_context *encoder_context)
1004 {
1005     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1006     
1007     (void)mfc_context;
1008 }
1009
1010 static void
1011 gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1012                                          int index,
1013                                          int head_offset,
1014                                          int batchbuffer_offset,
1015                                          int head_size,
1016                                          int tail_size,
1017                                          int number_mb_cmds,
1018                                          int first_object,
1019                                          int last_object,
1020                                          int last_slice,
1021                                          int mb_x,
1022                                          int mb_y,
1023                                          int width_in_mbs,
1024                                          int qp)
1025 {
1026     BEGIN_BATCH(batch, 12);
1027     
1028     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1029     OUT_BATCH(batch, index);
1030     OUT_BATCH(batch, 0);
1031     OUT_BATCH(batch, 0);
1032     OUT_BATCH(batch, 0);
1033     OUT_BATCH(batch, 0);
1034    
1035     /*inline data */
1036     OUT_BATCH(batch, head_offset);
1037     OUT_BATCH(batch, batchbuffer_offset);
1038     OUT_BATCH(batch, 
1039               head_size << 16 |
1040               tail_size);
1041     OUT_BATCH(batch,
1042               number_mb_cmds << 16 |
1043               first_object << 2 |
1044               last_object << 1 |
1045               last_slice);
1046     OUT_BATCH(batch,
1047               mb_y << 8 |
1048               mb_x);
1049     OUT_BATCH(batch,
1050               qp << 16 |
1051               width_in_mbs);
1052
1053     ADVANCE_BATCH(batch);
1054 }
1055
1056 static void
1057 gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1058                                        struct intel_encoder_context *encoder_context,
1059                                        VAEncSliceParameterBufferH264 *slice_param,
1060                                        int head_offset,
1061                                        unsigned short head_size,
1062                                        unsigned short tail_size,
1063                                        int batchbuffer_offset,
1064                                        int qp,
1065                                        int last_slice)
1066 {
1067     struct intel_batchbuffer *batch = encoder_context->base.batch;
1068     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1069     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1070     int total_mbs = slice_param->num_macroblocks;
1071     int number_mb_cmds = 128;
1072     int starting_mb = 0;
1073     int last_object = 0;
1074     int first_object = 1;
1075     int i;
1076     int mb_x, mb_y;
1077     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1078
1079     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1080         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1081         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1082         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1083         assert(mb_x <= 255 && mb_y <= 255);
1084
1085         starting_mb += number_mb_cmds;
1086
1087         gen6_mfc_batchbuffer_emit_object_command(batch,
1088                                                  index,
1089                                                  head_offset,
1090                                                  batchbuffer_offset,
1091                                                  head_size,
1092                                                  tail_size,
1093                                                  number_mb_cmds,
1094                                                  first_object,
1095                                                  last_object,
1096                                                  last_slice,
1097                                                  mb_x,
1098                                                  mb_y,
1099                                                  width_in_mbs,
1100                                                  qp);
1101
1102         if (first_object) {
1103             head_offset += head_size;
1104             batchbuffer_offset += head_size;
1105         }
1106
1107         if (last_object) {
1108             head_offset += tail_size;
1109             batchbuffer_offset += tail_size;
1110         }
1111
1112         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1113
1114         first_object = 0;
1115     }
1116
1117     if (!last_object) {
1118         last_object = 1;
1119         number_mb_cmds = total_mbs % number_mb_cmds;
1120         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1121         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1122         assert(mb_x <= 255 && mb_y <= 255);
1123         starting_mb += number_mb_cmds;
1124
1125         gen6_mfc_batchbuffer_emit_object_command(batch,
1126                                                  index,
1127                                                  head_offset,
1128                                                  batchbuffer_offset,
1129                                                  head_size,
1130                                                  tail_size,
1131                                                  number_mb_cmds,
1132                                                  first_object,
1133                                                  last_object,
1134                                                  last_slice,
1135                                                  mb_x,
1136                                                  mb_y,
1137                                                  width_in_mbs,
1138                                                  qp);
1139     }
1140 }
1141                           
1142 /*
1143  * return size in Owords (16bytes)
1144  */         
1145 static int
1146 gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1147                                struct encode_state *encode_state,
1148                                struct intel_encoder_context *encoder_context,
1149                                int slice_index,
1150                                int batchbuffer_offset)
1151 {
1152     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1153     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1154     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1155     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1156     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1157     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1158     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1159     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1160     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1161     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1162     unsigned char *slice_header = NULL;
1163     int slice_header_length_in_bits = 0;
1164     unsigned int tail_data[] = { 0x0, 0x0 };
1165     long head_offset;
1166     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1167     unsigned short head_size, tail_size;
1168     int slice_type = pSliceParameter->slice_type;
1169
1170     if (rate_control_mode == VA_RC_CBR) {
1171         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1172         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1173     }
1174
1175     /* only support for 8-bit pixel bit-depth */
1176     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1177     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1178     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1179     assert(qp >= 0 && qp < 52);
1180
1181     head_offset = old_used / 16;
1182     gen6_mfc_avc_slice_state(ctx,
1183                              pPicParameter,
1184                              pSliceParameter,
1185                              encode_state,
1186                              encoder_context,
1187                              (rate_control_mode == VA_RC_CBR),
1188                              qp,
1189                              slice_batch);
1190
1191     if (slice_index == 0)
1192         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1193
1194     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1195
1196     // slice hander
1197     mfc_context->insert_object(ctx,
1198                                encoder_context,
1199                                (unsigned int *)slice_header,
1200                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1201                                slice_header_length_in_bits & 0x1f,
1202                                5,  /* first 5 bytes are start code + nal unit type */
1203                                1,
1204                                0,
1205                                1,
1206                                slice_batch);
1207     free(slice_header);
1208
1209     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1210     used = intel_batchbuffer_used_size(slice_batch);
1211     head_size = (used - old_used) / 16;
1212     old_used = used;
1213
1214     /* tail */
1215     if (last_slice) {    
1216         mfc_context->insert_object(ctx,
1217                                    encoder_context,
1218                                    tail_data,
1219                                    2,
1220                                    8,
1221                                    2,
1222                                    1,
1223                                    1,
1224                                    0,
1225                                    slice_batch);
1226     } else {
1227         mfc_context->insert_object(ctx,
1228                                    encoder_context,
1229                                    tail_data,
1230                                    1,
1231                                    8,
1232                                    1,
1233                                    1,
1234                                    1,
1235                                    0,
1236                                    slice_batch);
1237     }
1238
1239     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1240     used = intel_batchbuffer_used_size(slice_batch);
1241     tail_size = (used - old_used) / 16;
1242
1243    
1244     gen6_mfc_avc_batchbuffer_slice_command(ctx,
1245                                            encoder_context,
1246                                            pSliceParameter,
1247                                            head_offset,
1248                                            head_size,
1249                                            tail_size,
1250                                            batchbuffer_offset,
1251                                            qp,
1252                                            last_slice);
1253
1254     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1255 }
1256
1257 static void
1258 gen6_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1259                                   struct encode_state *encode_state,
1260                                   struct intel_encoder_context *encoder_context)
1261 {
1262     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1263     struct intel_batchbuffer *batch = encoder_context->base.batch;
1264     int i, size, offset = 0;
1265     intel_batchbuffer_start_atomic(batch, 0x4000); 
1266     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1267
1268     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1269         size = gen6_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1270         offset += size;
1271     }
1272
1273     intel_batchbuffer_end_atomic(batch);
1274     intel_batchbuffer_flush(batch);
1275 }
1276
1277 static void
1278 gen6_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1279                                struct encode_state *encode_state,
1280                                struct intel_encoder_context *encoder_context)
1281 {
1282     gen6_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1283     gen6_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1284     gen6_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1285     gen6_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1286 }
1287
1288 static dri_bo *
1289 gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1290                                   struct encode_state *encode_state,
1291                                   struct intel_encoder_context *encoder_context)
1292 {
1293     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1294
1295     gen6_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1296     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1297
1298     return mfc_context->mfc_batchbuffer_surface.bo;
1299 }
1300
1301 #endif
1302
1303
1304 static void
1305 gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
1306                                  struct encode_state *encode_state,
1307                                  struct intel_encoder_context *encoder_context)
1308 {
1309     struct intel_batchbuffer *batch = encoder_context->base.batch;
1310     dri_bo *slice_batch_bo;
1311
1312     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1313         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1314         assert(0);
1315         return; 
1316     }
1317
1318 #if __SOFTWARE__
1319     slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1320 #else
1321     slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1322 #endif
1323
1324     // begin programing
1325     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1326     intel_batchbuffer_emit_mi_flush(batch);
1327     
1328     // picture level programing
1329     gen6_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1330
1331     BEGIN_BCS_BATCH(batch, 2);
1332     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1333     OUT_BCS_RELOC(batch,
1334                   slice_batch_bo,
1335                   I915_GEM_DOMAIN_COMMAND, 0, 
1336                   0);
1337     ADVANCE_BCS_BATCH(batch);
1338
1339     // end programing
1340     intel_batchbuffer_end_atomic(batch);
1341
1342     dri_bo_unreference(slice_batch_bo);
1343 }
1344
1345 static VAStatus
1346 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1347                             struct encode_state *encode_state,
1348                             struct intel_encoder_context *encoder_context)
1349 {
1350     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1351     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1352     int current_frame_bits_size;
1353     int sts;
1354  
1355     for (;;) {
1356         gen6_mfc_init(ctx, encode_state, encoder_context);
1357         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1358         /*Programing bcs pipeline*/
1359         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1360         gen6_mfc_run(ctx, encode_state, encoder_context);
1361         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1362             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1363             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1364             if (sts == BRC_NO_HRD_VIOLATION) {
1365                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1366                 break;
1367             }
1368             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1369                 if (!mfc_context->hrd.violation_noted) {
1370                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1371                     mfc_context->hrd.violation_noted = 1;
1372                 }
1373                 return VA_STATUS_SUCCESS;
1374             }
1375         } else {
1376             break;
1377         }
1378     }
1379
1380     return VA_STATUS_SUCCESS;
1381 }
1382
1383 /*
1384  * MPEG-2
1385  */
1386
1387 static void
1388 gen6_mfc_qm_state(VADriverContextP ctx,
1389                   int qm_type,
1390                   unsigned int *qm,
1391                   int qm_length,
1392                   struct intel_encoder_context *encoder_context)
1393 {
1394     struct intel_batchbuffer *batch = encoder_context->base.batch;
1395     unsigned int qm_buffer[16];
1396
1397     assert(qm_length <= 16);
1398     assert(sizeof(*qm) == 4);
1399     memcpy(qm_buffer, qm, qm_length * 4);
1400
1401     BEGIN_BCS_BATCH(batch, 18);
1402     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
1403     OUT_BCS_BATCH(batch, qm_type << 0);
1404     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
1405     ADVANCE_BCS_BATCH(batch);
1406 }
1407
1408 static void
1409 gen6_mfc_fqm_state(VADriverContextP ctx,
1410                    int fqm_type,
1411                    unsigned int *fqm,
1412                    int fqm_length,
1413                    struct intel_encoder_context *encoder_context)
1414 {
1415     struct intel_batchbuffer *batch = encoder_context->base.batch;
1416     unsigned int fqm_buffer[32];
1417
1418     assert(fqm_length <= 32);
1419     assert(sizeof(*fqm) == 4);
1420     memcpy(fqm_buffer, fqm, fqm_length * 4);
1421
1422     BEGIN_BCS_BATCH(batch, 34);
1423     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
1424     OUT_BCS_BATCH(batch, fqm_type << 0);
1425     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
1426     ADVANCE_BCS_BATCH(batch);
1427 }
1428
1429 static const int
1430 va_to_gen6_mpeg2_picture_type[3] = {
1431     1,  /* I */
1432     2,  /* P */
1433     3   /* B */
1434 };
1435
1436 static void
1437 gen6_mfc_mpeg2_pic_state(VADriverContextP ctx,
1438                           struct intel_encoder_context *encoder_context,
1439                           struct encode_state *encode_state)
1440 {
1441     struct intel_batchbuffer *batch = encoder_context->base.batch;
1442     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1443     VAEncPictureParameterBufferMPEG2 *pic_param;
1444     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1445     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1446
1447     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1448     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1449
1450     BEGIN_BCS_BATCH(batch, 13);
1451     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1452     OUT_BCS_BATCH(batch,
1453                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1454                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1455                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1456                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1457                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1458                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1459                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1460                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1461                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1462                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1463                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1464                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1465     OUT_BCS_BATCH(batch,
1466                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1467                   va_to_gen6_mpeg2_picture_type[pic_param->picture_type] << 9 |
1468                   0);
1469     OUT_BCS_BATCH(batch,
1470                   1 << 31 |     /* slice concealment */
1471                   (height_in_mbs - 1) << 16 |
1472                   (width_in_mbs - 1));
1473     OUT_BCS_BATCH(batch, 0);
1474     OUT_BCS_BATCH(batch, 0);
1475     OUT_BCS_BATCH(batch,
1476                   0xFFF << 16 | /* InterMBMaxSize */
1477                   0xFFF << 0 |  /* IntraMBMaxSize */
1478                   0);
1479     OUT_BCS_BATCH(batch, 0);
1480     OUT_BCS_BATCH(batch, 0);
1481     OUT_BCS_BATCH(batch, 0);
1482     OUT_BCS_BATCH(batch, 0);
1483     OUT_BCS_BATCH(batch, 0);
1484     OUT_BCS_BATCH(batch, 0);
1485     ADVANCE_BCS_BATCH(batch);
1486 }
1487
1488 static void
1489 gen6_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1490 {
1491     unsigned char intra_qm[64] = {
1492          8, 16, 19, 22, 26, 27, 29, 34,
1493         16, 16, 22, 24, 27, 29, 34, 37,
1494         19, 22, 26, 27, 29, 34, 34, 38,
1495         22, 22, 26, 27, 29, 34, 37, 40,
1496         22, 26, 27, 29, 32, 35, 40, 48,
1497         26, 27, 29, 32, 35, 40, 48, 58,
1498         26, 27, 29, 34, 38, 46, 56, 69,
1499         27, 29, 35, 38, 46, 56, 69, 83
1500     };
1501
1502     unsigned char non_intra_qm[64] = {
1503         16, 16, 16, 16, 16, 16, 16, 16,
1504         16, 16, 16, 16, 16, 16, 16, 16,
1505         16, 16, 16, 16, 16, 16, 16, 16,
1506         16, 16, 16, 16, 16, 16, 16, 16,
1507         16, 16, 16, 16, 16, 16, 16, 16,
1508         16, 16, 16, 16, 16, 16, 16, 16,
1509         16, 16, 16, 16, 16, 16, 16, 16,
1510         16, 16, 16, 16, 16, 16, 16, 16
1511     };
1512
1513     gen6_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1514     gen6_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1515 }
1516
1517 static void
1518 gen6_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1519 {
1520     unsigned short intra_fqm[64] = {
1521          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1522          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1523          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1524          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1525          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1526          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1527          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1528          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1529     };
1530
1531     unsigned short non_intra_fqm[64] = {
1532         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1533         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1534         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1535         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1536         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1537         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1538         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1539         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1540     };
1541
1542     gen6_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1543     gen6_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1544 }
1545
1546 static void
1547 gen6_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1548                                  struct intel_encoder_context *encoder_context,
1549                                  int x, int y,
1550                                  int next_x, int next_y,
1551                                  int is_fisrt_slice_group,
1552                                  int is_last_slice_group,
1553                                  int intra_slice,
1554                                  int qp,
1555                                  struct intel_batchbuffer *batch)
1556 {
1557     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1558
1559     if (batch == NULL)
1560         batch = encoder_context->base.batch;
1561
1562     BEGIN_BCS_BATCH(batch, 8);
1563
1564     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1565     OUT_BCS_BATCH(batch,
1566                   0 << 31 |                             /* MbRateCtrlFlag */
1567                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1568                   1 << 17 |                             /* Insert Header before the first slice group data */
1569                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1570                   1 << 15 |                             /* TailPresentFlag: always 1 */
1571                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1572                   !!intra_slice << 13 |                 /* IntraSlice */
1573                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1574                   0);
1575     OUT_BCS_BATCH(batch,
1576                   next_y << 24 |
1577                   next_x << 16 |
1578                   y << 8 |
1579                   x << 0 |
1580                   0);
1581     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1582     /* bitstream pointer is only loaded once for the first slice of a frame when 
1583      * LoadSlicePointerFlag is 0
1584      */
1585     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1586     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1587     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1588     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1589
1590     ADVANCE_BCS_BATCH(batch);
1591 }
1592
1593 static int
1594 gen6_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1595                                  struct intel_encoder_context *encoder_context,
1596                                  int x, int y,
1597                                  int first_mb_in_slice,
1598                                  int last_mb_in_slice,
1599                                  int first_mb_in_slice_group,
1600                                  int last_mb_in_slice_group,
1601                                  int mb_type,
1602                                  int qp_scale_code,
1603                                  int coded_block_pattern,
1604                                  unsigned char target_size_in_word,
1605                                  unsigned char max_size_in_word,
1606                                  struct intel_batchbuffer *batch)
1607 {
1608     int len_in_dwords = 9;
1609
1610     if (batch == NULL)
1611         batch = encoder_context->base.batch;
1612
1613     BEGIN_BCS_BATCH(batch, len_in_dwords);
1614
1615     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1616     OUT_BCS_BATCH(batch,
1617                   0 << 24 |     /* PackedMvNum */
1618                   0 << 20 |     /* MvFormat */
1619                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1620                   0 << 15 |     /* TransformFlag: frame DCT */
1621                   0 << 14 |     /* FieldMbFlag */
1622                   1 << 13 |     /* IntraMbFlag */
1623                   mb_type << 8 |   /* MbType: Intra */
1624                   0 << 2 |      /* SkipMbFlag */
1625                   0 << 0 |      /* InterMbMode */
1626                   0);
1627     OUT_BCS_BATCH(batch, y << 16 | x);
1628     OUT_BCS_BATCH(batch,
1629                   max_size_in_word << 24 |
1630                   target_size_in_word << 16 |
1631                   coded_block_pattern << 6 |      /* CBP */
1632                   0);
1633     OUT_BCS_BATCH(batch,
1634                   last_mb_in_slice << 31 |
1635                   first_mb_in_slice << 30 |
1636                   0 << 27 |     /* EnableCoeffClamp */
1637                   last_mb_in_slice_group << 26 |
1638                   0 << 25 |     /* MbSkipConvDisable */
1639                   first_mb_in_slice_group << 24 |
1640                   0 << 16 |     /* MvFieldSelect */
1641                   qp_scale_code << 0 |
1642                   0);
1643     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1644     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1645     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1646     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1647
1648     ADVANCE_BCS_BATCH(batch);
1649
1650     return len_in_dwords;
1651 }
1652
1653 #define MV_OFFSET_IN_WORD       112
1654
1655 static struct _mv_ranges
1656 {
1657     int low;    /* in the unit of 1/2 pixel */
1658     int high;   /* in the unit of 1/2 pixel */
1659 } mv_ranges[] = {
1660     {0, 0},
1661     {-16, 15},
1662     {-32, 31},
1663     {-64, 63},
1664     {-128, 127},
1665     {-256, 255},
1666     {-512, 511},
1667     {-1024, 1023},
1668     {-2048, 2047},
1669     {-4096, 4095}
1670 };
1671
1672 static int
1673 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1674 {
1675     if (mv + pos * 16 * 2 < 0 ||
1676         mv + (pos + 1) * 16 * 2 > display_max * 2)
1677         mv = 0;
1678
1679     if (f_code > 0 && f_code < 10) {
1680         if (mv < mv_ranges[f_code].low)
1681             mv = mv_ranges[f_code].low;
1682
1683         if (mv > mv_ranges[f_code].high)
1684             mv = mv_ranges[f_code].high;
1685     }
1686
1687     return mv;
1688 }
1689
1690 static int
1691 gen6_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1692                                  struct encode_state *encode_state,
1693                                  struct intel_encoder_context *encoder_context,
1694                                  unsigned int *msg,
1695                                  int width_in_mbs, int height_in_mbs,
1696                                  int x, int y,
1697                                  int first_mb_in_slice,
1698                                  int last_mb_in_slice,
1699                                  int first_mb_in_slice_group,
1700                                  int last_mb_in_slice_group,
1701                                  int qp_scale_code,
1702                                  unsigned char target_size_in_word,
1703                                  unsigned char max_size_in_word,
1704                                  struct intel_batchbuffer *batch)
1705 {
1706     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1707     int len_in_dwords = 9;
1708     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1709  
1710     if (batch == NULL)
1711         batch = encoder_context->base.batch;
1712
1713     mvptr = (short *)msg;
1714     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1715     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1716     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1717     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1718
1719     BEGIN_BCS_BATCH(batch, len_in_dwords);
1720
1721     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1722     OUT_BCS_BATCH(batch,
1723                   2 << 24 |     /* PackedMvNum */
1724                   7 << 20 |     /* MvFormat */
1725                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1726                   0 << 15 |     /* TransformFlag: frame DCT */
1727                   0 << 14 |     /* FieldMbFlag */
1728                   0 << 13 |     /* IntraMbFlag */
1729                   1 << 8 |      /* MbType: Frame-based */
1730                   0 << 2 |      /* SkipMbFlag */
1731                   0 << 0 |      /* InterMbMode */
1732                   0);
1733     OUT_BCS_BATCH(batch, y << 16 | x);
1734     OUT_BCS_BATCH(batch,
1735                   max_size_in_word << 24 |
1736                   target_size_in_word << 16 |
1737                   0x3f << 6 |   /* CBP */
1738                   0);
1739     OUT_BCS_BATCH(batch,
1740                   last_mb_in_slice << 31 |
1741                   first_mb_in_slice << 30 |
1742                   0 << 27 |     /* EnableCoeffClamp */
1743                   last_mb_in_slice_group << 26 |
1744                   0 << 25 |     /* MbSkipConvDisable */
1745                   first_mb_in_slice_group << 24 |
1746                   0 << 16 |     /* MvFieldSelect */
1747                   qp_scale_code << 0 |
1748                   0);
1749
1750     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1751     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1752     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1753     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1754
1755     ADVANCE_BCS_BATCH(batch);
1756
1757     return len_in_dwords;
1758 }
1759
1760 #define INTRA_RDO_OFFSET        4
1761 #define INTER_RDO_OFFSET        54
1762 #define INTER_MSG_OFFSET        52
1763 #define INTER_MV_OFFSET         224
1764 #define RDO_MASK                0xFFFF
1765
1766 static void
1767 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1768                                            struct encode_state *encode_state,
1769                                            struct intel_encoder_context *encoder_context,
1770                                            struct intel_batchbuffer *slice_batch)
1771 {
1772     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1773     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1774
1775     if (encode_state->packed_header_data[idx]) {
1776         VAEncPackedHeaderParameterBuffer *param = NULL;
1777         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1778         unsigned int length_in_bits;
1779
1780         assert(encode_state->packed_header_param[idx]);
1781         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1782         length_in_bits = param->bit_length;
1783
1784         mfc_context->insert_object(ctx,
1785                                    encoder_context,
1786                                    header_data,
1787                                    ALIGN(length_in_bits, 32) >> 5,
1788                                    length_in_bits & 0x1f,
1789                                    5,   /* FIXME: check it */
1790                                    0,
1791                                    0,
1792                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1793                                    slice_batch);
1794     }
1795
1796     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
1797
1798     if (encode_state->packed_header_data[idx]) {
1799         VAEncPackedHeaderParameterBuffer *param = NULL;
1800         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1801         unsigned int length_in_bits;
1802
1803         assert(encode_state->packed_header_param[idx]);
1804         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1805         length_in_bits = param->bit_length;
1806
1807         mfc_context->insert_object(ctx,
1808                                    encoder_context,
1809                                    header_data,
1810                                    ALIGN(length_in_bits, 32) >> 5,
1811                                    length_in_bits & 0x1f,
1812                                    5,   /* FIXME: check it */
1813                                    0,
1814                                    0,
1815                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1816                                    slice_batch);
1817     }
1818 }
1819
1820 static void 
1821 gen6_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
1822                                      struct encode_state *encode_state,
1823                                      struct intel_encoder_context *encoder_context,
1824                                      int slice_index,
1825                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
1826                                      struct intel_batchbuffer *slice_batch)
1827 {
1828     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1829     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1830     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1831     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1832     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
1833     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
1834     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1835     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1836     int i, j;
1837     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
1838     unsigned int *msg = NULL, offset = 0;
1839     unsigned char *msg_ptr = NULL;
1840
1841     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
1842     h_start_pos = slice_param->macroblock_address % width_in_mbs;
1843     v_start_pos = slice_param->macroblock_address / width_in_mbs;
1844     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
1845
1846     dri_bo_map(vme_context->vme_output.bo , 0);
1847     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1848
1849     if (next_slice_group_param) {
1850         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
1851         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
1852     } else {
1853         h_next_start_pos = 0;
1854         v_next_start_pos = height_in_mbs;
1855     }
1856
1857     gen6_mfc_mpeg2_slicegroup_state(ctx,
1858                                      encoder_context,
1859                                      h_start_pos,
1860                                      v_start_pos,
1861                                      h_next_start_pos,
1862                                      v_next_start_pos,
1863                                      slice_index == 0,
1864                                      next_slice_group_param == NULL,
1865                                      slice_param->is_intra_slice,
1866                                      slice_param->quantiser_scale_code,
1867                                      slice_batch);
1868
1869     if (slice_index == 0) 
1870         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1871
1872     /* Insert '00' to make sure the header is valid */
1873     mfc_context->insert_object(ctx,
1874                                encoder_context,
1875                                (unsigned int*)section_delimiter,
1876                                1,
1877                                8,   /* 8bits in the last DWORD */
1878                                1,   /* 1 byte */
1879                                1,
1880                                0,
1881                                0,
1882                                slice_batch);
1883
1884     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
1885         /* PAK for each macroblocks */
1886         for (j = 0; j < slice_param->num_macroblocks; j++) {
1887             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
1888             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
1889             int first_mb_in_slice = (j == 0);
1890             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
1891             int first_mb_in_slice_group = (i == 0 && j == 0);
1892             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
1893                                           j == slice_param->num_macroblocks - 1);
1894
1895             if (slice_param->is_intra_slice) {
1896                 gen6_mfc_mpeg2_pak_object_intra(ctx,
1897                                                  encoder_context,
1898                                                  h_pos, v_pos,
1899                                                  first_mb_in_slice,
1900                                                  last_mb_in_slice,
1901                                                  first_mb_in_slice_group,
1902                                                  last_mb_in_slice_group,
1903                                                  0x1a,
1904                                                  slice_param->quantiser_scale_code,
1905                                                  0x3f,
1906                                                  0,
1907                                                  0xff,
1908                                                  slice_batch);
1909             } else {
1910                 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
1911
1912                 if(msg[32] & INTRA_MB_FLAG_MASK) {
1913                      gen6_mfc_mpeg2_pak_object_intra(ctx,
1914                                                      encoder_context,
1915                                                      h_pos, v_pos,
1916                                                      first_mb_in_slice,
1917                                                      last_mb_in_slice,
1918                                                      first_mb_in_slice_group,
1919                                                      last_mb_in_slice_group,
1920                                                      0x1a,
1921                                                      slice_param->quantiser_scale_code,
1922                                                      0x3f,
1923                                                      0,
1924                                                      0xff,
1925                                                      slice_batch);
1926                  } else {
1927
1928                     gen6_mfc_mpeg2_pak_object_inter(ctx,
1929                                                     encode_state,
1930                                                     encoder_context,
1931                                                     msg,
1932                                                     width_in_mbs, height_in_mbs,
1933                                                     h_pos, v_pos,
1934                                                     first_mb_in_slice,
1935                                                     last_mb_in_slice,
1936                                                     first_mb_in_slice_group,
1937                                                     last_mb_in_slice_group,
1938                                                     slice_param->quantiser_scale_code,
1939                                                     0,
1940                                                     0xff,
1941                                                     slice_batch);
1942               }
1943            }
1944         }
1945
1946         slice_param++;
1947     }
1948
1949     dri_bo_unmap(vme_context->vme_output.bo);
1950
1951     /* tail data */
1952     if (next_slice_group_param == NULL) { /* end of a picture */
1953         mfc_context->insert_object(ctx,
1954                                    encoder_context,
1955                                    (unsigned int *)tail_delimiter,
1956                                    2,
1957                                    8,   /* 8bits in the last DWORD */
1958                                    5,   /* 5 bytes */
1959                                    1,
1960                                    1,
1961                                    0,
1962                                    slice_batch);
1963     } else {        /* end of a lsice group */
1964         mfc_context->insert_object(ctx,
1965                                    encoder_context,
1966                                    (unsigned int *)section_delimiter,
1967                                    1,
1968                                    8,   /* 8bits in the last DWORD */
1969                                    1,   /* 1 byte */
1970                                    1,
1971                                    1,
1972                                    0,
1973                                    slice_batch);
1974     }
1975 }
1976
1977 /* 
1978  * A batch buffer for all slices, including slice state, 
1979  * slice insert object and slice pak object commands
1980  *
1981  */
1982 static dri_bo *
1983 gen6_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
1984                                            struct encode_state *encode_state,
1985                                            struct intel_encoder_context *encoder_context)
1986 {
1987     struct i965_driver_data *i965 = i965_driver_data(ctx);
1988     struct intel_batchbuffer *batch;
1989     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1990     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
1991     dri_bo *batch_bo;
1992     int i;
1993     int buffer_size;
1994     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1995     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1996
1997     buffer_size = width_in_mbs * height_in_mbs * 64;
1998     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1999     batch_bo = batch->buffer;
2000
2001     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2002         if (i == encode_state->num_slice_params_ext - 1)
2003             next_slice_group_param = NULL;
2004         else
2005             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2006
2007         gen6_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2008     }
2009
2010     intel_batchbuffer_align(batch, 8);
2011     
2012     BEGIN_BCS_BATCH(batch, 2);
2013     OUT_BCS_BATCH(batch, 0);
2014     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2015     ADVANCE_BCS_BATCH(batch);
2016
2017     dri_bo_reference(batch_bo);
2018     intel_batchbuffer_free(batch);
2019
2020     return batch_bo;
2021 }
2022
2023 static void
2024 gen6_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2025                                             struct encode_state *encode_state,
2026                                             struct intel_encoder_context *encoder_context)
2027 {
2028     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2029
2030     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2031     mfc_context->set_surface_state(ctx, encoder_context);
2032     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2033     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
2034     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2035     gen6_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2036     gen6_mfc_mpeg2_qm_state(ctx, encoder_context);
2037     gen6_mfc_mpeg2_fqm_state(ctx, encoder_context);
2038 }
2039
2040 static void
2041 gen6_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2042                                     struct encode_state *encode_state,
2043                                     struct intel_encoder_context *encoder_context)
2044 {
2045     struct intel_batchbuffer *batch = encoder_context->base.batch;
2046     dri_bo *slice_batch_bo;
2047
2048     slice_batch_bo = gen6_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2049
2050     // begin programing
2051     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2052     intel_batchbuffer_emit_mi_flush(batch);
2053     
2054     // picture level programing
2055     gen6_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2056
2057     BEGIN_BCS_BATCH(batch, 2);
2058     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2059     OUT_BCS_RELOC(batch,
2060                   slice_batch_bo,
2061                   I915_GEM_DOMAIN_COMMAND, 0, 
2062                   0);
2063     ADVANCE_BCS_BATCH(batch);
2064
2065     // end programing
2066     intel_batchbuffer_end_atomic(batch);
2067
2068     dri_bo_unreference(slice_batch_bo);
2069 }
2070
2071 static VAStatus
2072 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2073                         struct encode_state *encode_state,
2074                         struct intel_encoder_context *encoder_context)
2075 {
2076     struct i965_driver_data *i965 = i965_driver_data(ctx);
2077     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2078     struct object_surface *obj_surface;
2079     struct object_buffer *obj_buffer;
2080     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2081     struct i965_coded_buffer_segment *coded_buffer_segment;
2082     VAStatus vaStatus = VA_STATUS_SUCCESS;
2083     dri_bo *bo;
2084     int i;
2085
2086     /* reconstructed surface */
2087     obj_surface = SURFACE(pic_param->reconstructed_picture);
2088     assert(obj_surface);
2089     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2090     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2091     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2092     mfc_context->surface_state.width = obj_surface->orig_width;
2093     mfc_context->surface_state.height = obj_surface->orig_height;
2094     mfc_context->surface_state.w_pitch = obj_surface->width;
2095     mfc_context->surface_state.h_pitch = obj_surface->height;
2096
2097     /* forward reference */
2098     obj_surface = SURFACE(pic_param->forward_reference_picture);
2099
2100     if (obj_surface && obj_surface->bo) {
2101         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2102         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2103     } else
2104         mfc_context->reference_surfaces[0].bo = NULL;
2105
2106     /* backward reference */
2107     obj_surface = SURFACE(pic_param->backward_reference_picture);
2108
2109     if (obj_surface && obj_surface->bo) {
2110         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2111         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2112     } else {
2113         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2114
2115         if (mfc_context->reference_surfaces[1].bo)
2116             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2117     }
2118
2119     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2120         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2121
2122         if (mfc_context->reference_surfaces[i].bo)
2123             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2124     }
2125     
2126     /* input YUV surface */
2127     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2128     assert(obj_surface && obj_surface->bo);
2129     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2130     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2131
2132     /* coded buffer */
2133     obj_buffer = BUFFER(pic_param->coded_buf);
2134     bo = obj_buffer->buffer_store->bo;
2135     assert(bo);
2136     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2137     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2138     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2139     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2140
2141     /* set the internal flag to 0 to indicate the coded size is unknown */
2142     dri_bo_map(bo, 1);
2143     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2144     coded_buffer_segment->mapped = 0;
2145     coded_buffer_segment->codec = CODED_MPEG2;
2146     dri_bo_unmap(bo);
2147
2148     return vaStatus;
2149 }
2150
2151 static VAStatus
2152 gen6_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2153                                struct encode_state *encode_state,
2154                                struct intel_encoder_context *encoder_context)
2155 {
2156     gen6_mfc_init(ctx, encode_state, encoder_context);
2157     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2158     /*Programing bcs pipeline*/
2159     gen6_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2160     gen6_mfc_run(ctx, encode_state, encoder_context);
2161
2162     return VA_STATUS_SUCCESS;
2163 }
2164
2165
2166 VAStatus
2167 gen6_mfc_pipeline(VADriverContextP ctx,
2168                   VAProfile profile,
2169                   struct encode_state *encode_state,
2170                   struct intel_encoder_context *encoder_context)
2171 {
2172     VAStatus vaStatus;
2173
2174     switch (profile) {
2175     case VAProfileH264Baseline:
2176     case VAProfileH264Main:
2177     case VAProfileH264High:
2178         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2179         break;
2180
2181     case VAProfileMPEG2Simple:
2182     case VAProfileMPEG2Main:
2183         vaStatus = gen6_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2184         break;
2185
2186         /* FIXME: add for other profile */
2187     default:
2188         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2189         break;
2190     }
2191
2192     return vaStatus;
2193 }
2194
2195 void
2196 gen6_mfc_context_destroy(void *context)
2197 {
2198     struct gen6_mfc_context *mfc_context = context;
2199     int i;
2200
2201     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2202     mfc_context->post_deblocking_output.bo = NULL;
2203
2204     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2205     mfc_context->pre_deblocking_output.bo = NULL;
2206
2207     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2208     mfc_context->uncompressed_picture_source.bo = NULL;
2209
2210     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2211     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2212
2213     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2214         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2215         mfc_context->direct_mv_buffers[i].bo = NULL;
2216     }
2217
2218     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2219     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2220
2221     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2222     mfc_context->macroblock_status_buffer.bo = NULL;
2223
2224     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2225     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2226
2227     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2228     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2229
2230
2231     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2232         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2233         mfc_context->reference_surfaces[i].bo = NULL;  
2234     }
2235
2236     i965_gpe_context_destroy(&mfc_context->gpe_context);
2237
2238     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2239     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2240
2241     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2242     mfc_context->aux_batchbuffer_surface.bo = NULL;
2243
2244     if (mfc_context->aux_batchbuffer)
2245         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2246
2247     mfc_context->aux_batchbuffer = NULL;
2248
2249     free(mfc_context);
2250 }
2251
2252 Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2253 {
2254     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2255
2256     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2257
2258     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2259     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2260
2261     mfc_context->gpe_context.curbe.length = 32 * 4;
2262
2263     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2264     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2265     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2266     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2267     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2268
2269     i965_gpe_load_kernels(ctx,
2270                           &mfc_context->gpe_context,
2271                           gen6_mfc_kernels,
2272                           NUM_MFC_KERNEL);
2273
2274     mfc_context->pipe_mode_select = gen6_mfc_pipe_mode_select;
2275     mfc_context->set_surface_state = gen6_mfc_surface_state;
2276     mfc_context->ind_obj_base_addr_state = gen6_mfc_ind_obj_base_addr_state;
2277     mfc_context->avc_img_state = gen6_mfc_avc_img_state;
2278     mfc_context->avc_qm_state = gen6_mfc_avc_qm_state;
2279     mfc_context->avc_fqm_state = gen6_mfc_avc_fqm_state;
2280     mfc_context->insert_object = gen6_mfc_avc_insert_object;
2281     mfc_context->buffer_suface_setup = i965_gpe_buffer_suface_setup;
2282
2283     encoder_context->mfc_context = mfc_context;
2284     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
2285     encoder_context->mfc_pipeline = gen6_mfc_pipeline;
2286     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2287
2288     return True;
2289 }