Packing the Intra macroblock in P/B slice for MPEG2
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include <math.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen6_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
46 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
47 };
48
49 static const uint32_t gen6_mfc_batchbuffer_avc_inter[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_inter.g6b"
51 };
52
53 static struct i965_kernel gen6_mfc_kernels[] = {
54     {
55         "MFC AVC INTRA BATCHBUFFER ",
56         MFC_BATCHBUFFER_AVC_INTRA,
57         gen6_mfc_batchbuffer_avc_intra,
58         sizeof(gen6_mfc_batchbuffer_avc_intra),
59         NULL
60     },
61
62     {
63         "MFC AVC INTER BATCHBUFFER ",
64         MFC_BATCHBUFFER_AVC_INTER,
65         gen6_mfc_batchbuffer_avc_inter,
66         sizeof(gen6_mfc_batchbuffer_avc_inter),
67         NULL
68     },
69 };
70
71 static void
72 gen6_mfc_pipe_mode_select(VADriverContextP ctx,
73                           int standard_select,
74                           struct intel_encoder_context *encoder_context)
75 {
76     struct intel_batchbuffer *batch = encoder_context->base.batch;
77     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
78
79     assert(standard_select == MFX_FORMAT_AVC);
80
81     BEGIN_BCS_BATCH(batch, 4);
82
83     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
84     OUT_BCS_BATCH(batch,
85                   (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
86                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
87                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
88                   (0 << 7)  | /* disable TLB prefectch */
89                   (0 << 5)  | /* not in stitch mode */
90                   (1 << 4)  | /* encoding mode */
91                   (2 << 0));  /* Standard Select: AVC */
92     OUT_BCS_BATCH(batch,
93                   (0 << 20) | /* round flag in PB slice */
94                   (0 << 19) | /* round flag in Intra8x8 */
95                   (0 << 7)  | /* expand NOA bus flag */
96                   (1 << 6)  | /* must be 1 */
97                   (0 << 5)  | /* disable clock gating for NOA */
98                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
99                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
100                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
101                   (0 << 1)  | /* AVC long field motion vector */
102                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
103     OUT_BCS_BATCH(batch, 0);
104
105     ADVANCE_BCS_BATCH(batch);
106 }
107
108 static void
109 gen6_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
110 {
111     struct intel_batchbuffer *batch = encoder_context->base.batch;
112     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
113
114     BEGIN_BCS_BATCH(batch, 6);
115
116     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
117     OUT_BCS_BATCH(batch, 0);
118     OUT_BCS_BATCH(batch,
119                   ((mfc_context->surface_state.height - 1) << 19) |
120                   ((mfc_context->surface_state.width - 1) << 6));
121     OUT_BCS_BATCH(batch,
122                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
123                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
124                   (0 << 22) | /* surface object control state, FIXME??? */
125                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
126                   (0 << 2)  | /* must be 0 for interleave U/V */
127                   (1 << 1)  | /* must be y-tiled */
128                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
129     OUT_BCS_BATCH(batch,
130                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
131                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
132     OUT_BCS_BATCH(batch, 0);
133     ADVANCE_BCS_BATCH(batch);
134 }
135
136 static void
137 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
138 {
139     struct intel_batchbuffer *batch = encoder_context->base.batch;
140     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
141     int i;
142
143     BEGIN_BCS_BATCH(batch, 24);
144
145     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
146
147     if (mfc_context->pre_deblocking_output.bo)
148         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
149                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
150                       0);
151     else
152         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
153
154     if (mfc_context->post_deblocking_output.bo)
155         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
156                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
157                       0);                                                                                       /* post output addr  */ 
158     else
159         OUT_BCS_BATCH(batch, 0);
160
161     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
162                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
163                   0);                                                                                   /* uncompressed data */
164     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
165                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
166                   0);                                                                                   /* StreamOut data*/
167     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
168                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
169                   0);   
170     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
171                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
172                   0);
173     /* 7..22 Reference pictures*/
174     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
175         if ( mfc_context->reference_surfaces[i].bo != NULL) {
176             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
177                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                           0);                   
179         } else {
180             OUT_BCS_BATCH(batch, 0);
181         }
182     }
183     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* Macroblock status buffer*/
186
187     ADVANCE_BCS_BATCH(batch);
188 }
189
190 static void
191 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
192 {
193     struct intel_batchbuffer *batch = encoder_context->base.batch;
194     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
195     struct gen6_vme_context *vme_context = encoder_context->vme_context;
196
197     BEGIN_BCS_BATCH(batch, 11);
198
199     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
200     OUT_BCS_BATCH(batch, 0);
201     OUT_BCS_BATCH(batch, 0);
202     /* MFX Indirect MV Object Base Address */
203     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
204     OUT_BCS_BATCH(batch, 0);    
205     OUT_BCS_BATCH(batch, 0);
206     OUT_BCS_BATCH(batch, 0);
207     OUT_BCS_BATCH(batch, 0);
208     OUT_BCS_BATCH(batch, 0);
209     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
210     OUT_BCS_RELOC(batch,
211                   mfc_context->mfc_indirect_pak_bse_object.bo,
212                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
213                   0);
214     OUT_BCS_RELOC(batch,
215                   mfc_context->mfc_indirect_pak_bse_object.bo,
216                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
218
219     ADVANCE_BCS_BATCH(batch);
220 }
221
222 static void
223 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
224 {
225     struct intel_batchbuffer *batch = encoder_context->base.batch;
226     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
227
228     BEGIN_BCS_BATCH(batch, 4);
229
230     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
231     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
232                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233                   0);
234     OUT_BCS_BATCH(batch, 0);
235     OUT_BCS_BATCH(batch, 0);
236
237     ADVANCE_BCS_BATCH(batch);
238 }
239
240 static void
241 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
242                        struct intel_encoder_context *encoder_context)
243 {
244     struct intel_batchbuffer *batch = encoder_context->base.batch;
245     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
246     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
247     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
248     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
249     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
250
251     BEGIN_BCS_BATCH(batch, 13);
252     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
253     OUT_BCS_BATCH(batch, 
254                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
255     OUT_BCS_BATCH(batch, 
256                   (height_in_mbs << 16) | 
257                   (width_in_mbs << 0));
258     OUT_BCS_BATCH(batch, 
259                   (0 << 24) |     /*Second Chroma QP Offset*/
260                   (0 << 16) |     /*Chroma QP Offset*/
261                   (0 << 14) |   /*Max-bit conformance Intra flag*/
262                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
263                   (1 << 12) |   /*Should always be written as "1" */
264                   (0 << 10) |   /*QM Preset FLag */
265                   (0 << 8)  |   /*Image Structure*/
266                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
267     OUT_BCS_BATCH(batch,
268                   (400 << 16) |   /*Mininum Frame size*/        
269                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
270                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
271                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
272                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
273                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
274                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
275                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
276                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
277                   (pSequenceParameter->seq_fields.bits.direct_8x8_inference_flag << 4)  |   /*Direct 8x8 inference flag*/
278                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
279                   (1 << 2)  |   /*Frame MB only flag*/
280                   (0 << 1)  |   /*MBAFF mode is in active*/
281                   (0 << 0) );   /*Field picture flag*/
282     OUT_BCS_BATCH(batch, 
283                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
284                   (1<<12)   |   
285                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
286                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
287                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
288                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
289                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
290     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
291                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
292                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
293     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
294     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
295     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
296     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
297     OUT_BCS_BATCH(batch, 0x00800001);   
298     OUT_BCS_BATCH(batch, 0);
299
300     ADVANCE_BCS_BATCH(batch);
301 }
302
303 static void
304 gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
305 {
306     struct intel_batchbuffer *batch = encoder_context->base.batch;
307     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
308
309     int i;
310
311     BEGIN_BCS_BATCH(batch, 69);
312
313     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
314
315     /* Reference frames and Current frames */
316     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
317         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
318             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
319                           I915_GEM_DOMAIN_INSTRUCTION, 0,
320                           0);
321         } else {
322             OUT_BCS_BATCH(batch, 0);
323         }
324     }
325
326     /* POL list */
327     for(i = 0; i < 32; i++) {
328         OUT_BCS_BATCH(batch, i/2);
329     }
330     OUT_BCS_BATCH(batch, 0);
331     OUT_BCS_BATCH(batch, 0);
332
333     ADVANCE_BCS_BATCH(batch);
334 }
335
336 static void
337 gen6_mfc_avc_slice_state(VADriverContextP ctx,
338                          VAEncPictureParameterBufferH264 *pic_param,
339                          VAEncSliceParameterBufferH264 *slice_param,
340                          struct encode_state *encode_state,
341                          struct intel_encoder_context *encoder_context,
342                          int rate_control_enable,
343                          int qp,
344                          struct intel_batchbuffer *batch)
345 {
346     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
347     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
348     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
349     int beginmb = slice_param->macroblock_address;
350     int endmb = beginmb + slice_param->num_macroblocks;
351     int beginx = beginmb % width_in_mbs;
352     int beginy = beginmb / width_in_mbs;
353     int nextx =  endmb % width_in_mbs;
354     int nexty = endmb / width_in_mbs;
355     int slice_type = slice_param->slice_type;
356     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
357     int bit_rate_control_target, maxQpN, maxQpP;
358     unsigned char correct[6], grow, shrink;
359     int i;
360     int weighted_pred_idc = 0;
361     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
362     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
363
364     if (batch == NULL)
365         batch = encoder_context->base.batch;
366
367     bit_rate_control_target = slice_type;
368     if (slice_type == SLICE_TYPE_SP)
369         bit_rate_control_target = SLICE_TYPE_P;
370     else if (slice_type == SLICE_TYPE_SI)
371         bit_rate_control_target = SLICE_TYPE_I;
372
373     if (slice_type == SLICE_TYPE_P) {
374         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
375     } else if (slice_type == SLICE_TYPE_B) {
376         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
377
378         if (weighted_pred_idc == 2) {
379             /* 8.4.3 - Derivation process for prediction weights (8-279) */
380             luma_log2_weight_denom = 5;
381             chroma_log2_weight_denom = 5;
382         }
383     }
384
385     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
386     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
387
388     for (i = 0; i < 6; i++)
389         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
390
391     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
392         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
393     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
394         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
395
396     BEGIN_BCS_BATCH(batch, 11);;
397
398     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
399     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
400
401     if (slice_type == SLICE_TYPE_I) {
402         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
403     } else {
404         OUT_BCS_BATCH(batch,
405                       (1 << 16) |                       /*1 reference frame*/
406                       (chroma_log2_weight_denom << 8) |
407                       (luma_log2_weight_denom << 0));
408     }
409
410     OUT_BCS_BATCH(batch, 
411                   (weighted_pred_idc << 30) |
412                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
413                   (slice_param->disable_deblocking_filter_idc << 27) |
414                   (slice_param->cabac_init_idc << 24) |
415                   (qp<<16) |                    /*Slice Quantization Parameter*/
416                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
417                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
418     OUT_BCS_BATCH(batch,
419                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
420                   (beginx << 16) |
421                   slice_param->macroblock_address );
422     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
423     OUT_BCS_BATCH(batch, 
424                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
425                   (1 << 30) |           /*ResetRateControlCounter*/
426                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
427                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
428                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
429                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
430                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
431                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
432                   (last_slice << 19) |     /*IsLastSlice*/
433                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
434                   (1 << 17) |       /*HeaderPresentFlag*/       
435                   (1 << 16) |       /*SliceData PresentFlag*/
436                   (1 << 15) |       /*TailPresentFlag*/
437                   (1 << 13) |       /*RBSP NAL TYPE*/   
438                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
439     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
440     OUT_BCS_BATCH(batch,
441                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
442                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
443                   (shrink << 8)  |
444                   (grow << 0));   
445     OUT_BCS_BATCH(batch,
446                   (correct[5] << 20) |
447                   (correct[4] << 16) |
448                   (correct[3] << 12) |
449                   (correct[2] << 8) |
450                   (correct[1] << 4) |
451                   (correct[0] << 0));
452     OUT_BCS_BATCH(batch, 0);
453
454     ADVANCE_BCS_BATCH(batch);
455 }
456 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
457 {
458     struct intel_batchbuffer *batch = encoder_context->base.batch;
459     int i;
460
461     BEGIN_BCS_BATCH(batch, 58);
462
463     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
464     OUT_BCS_BATCH(batch, 0xFF ) ; 
465     for( i = 0; i < 56; i++) {
466         OUT_BCS_BATCH(batch, 0x10101010); 
467     }   
468
469     ADVANCE_BCS_BATCH(batch);
470 }
471
472 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
473 {
474     struct intel_batchbuffer *batch = encoder_context->base.batch;
475     int i;
476
477     BEGIN_BCS_BATCH(batch, 113);
478     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
479
480     for(i = 0; i < 112;i++) {
481         OUT_BCS_BATCH(batch, 0x10001000);
482     }   
483
484     ADVANCE_BCS_BATCH(batch);   
485 }
486
487 static void
488 gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
489 {
490     struct intel_batchbuffer *batch = encoder_context->base.batch;
491     int i;
492
493     BEGIN_BCS_BATCH(batch, 10);
494     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
495     OUT_BCS_BATCH(batch, 0);                  //Select L0
496     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
497     for(i = 0; i < 7; i++) {
498         OUT_BCS_BATCH(batch, 0x80808080);
499     }   
500     ADVANCE_BCS_BATCH(batch);
501
502     BEGIN_BCS_BATCH(batch, 10);
503     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
504     OUT_BCS_BATCH(batch, 1);                  //Select L1
505     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
506     for(i = 0; i < 7; i++) {
507         OUT_BCS_BATCH(batch, 0x80808080);
508     }   
509     ADVANCE_BCS_BATCH(batch);
510 }
511         
512 static void
513 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
514                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
515                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
516                            struct intel_batchbuffer *batch)
517 {
518     if (batch == NULL)
519         batch = encoder_context->base.batch;
520
521     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
522
523     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
524
525     OUT_BCS_BATCH(batch,
526                   (0 << 16) |   /* always start at offset 0 */
527                   (data_bits_in_last_dw << 8) |
528                   (skip_emul_byte_count << 4) |
529                   (!!emulation_flag << 3) |
530                   ((!!is_last_header) << 2) |
531                   ((!!is_end_of_slice) << 1) |
532                   (0 << 0));    /* FIXME: ??? */
533
534     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
535     ADVANCE_BCS_BATCH(batch);
536 }
537
538 static void gen6_mfc_init(VADriverContextP ctx, 
539                             struct encode_state *encode_state,
540                             struct intel_encoder_context *encoder_context)
541 {
542     struct i965_driver_data *i965 = i965_driver_data(ctx);
543     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
544     dri_bo *bo;
545     int i;
546     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
547     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
548     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
549
550     /*Encode common setup for MFC*/
551     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
552     mfc_context->post_deblocking_output.bo = NULL;
553
554     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
555     mfc_context->pre_deblocking_output.bo = NULL;
556
557     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
558     mfc_context->uncompressed_picture_source.bo = NULL;
559
560     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
561     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
562
563     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
564         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
565         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
566         mfc_context->direct_mv_buffers[i].bo = NULL;
567     }
568
569     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
570         if (mfc_context->reference_surfaces[i].bo != NULL)
571             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
572         mfc_context->reference_surfaces[i].bo = NULL;  
573     }
574
575     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
576     bo = dri_bo_alloc(i965->intel.bufmgr,
577                       "Buffer",
578                       width_in_mbs * 64,
579                       64);
580     assert(bo);
581     mfc_context->intra_row_store_scratch_buffer.bo = bo;
582
583     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
584     bo = dri_bo_alloc(i965->intel.bufmgr,
585                       "Buffer",
586                       width_in_mbs * height_in_mbs * 16,
587                       64);
588     assert(bo);
589     mfc_context->macroblock_status_buffer.bo = bo;
590
591     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
592     bo = dri_bo_alloc(i965->intel.bufmgr,
593                       "Buffer",
594                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
595                       64);
596     assert(bo);
597     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
598
599     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
600     bo = dri_bo_alloc(i965->intel.bufmgr,
601                       "Buffer",
602                       128 * width_in_mbs, /* 2 * widht_in_mbs * 64 */
603                       0x1000);
604     assert(bo);
605     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
606
607     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
608     mfc_context->mfc_batchbuffer_surface.bo = NULL;
609
610     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
611     mfc_context->aux_batchbuffer_surface.bo = NULL;
612
613     if (mfc_context->aux_batchbuffer)
614         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
615
616     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
617     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
618     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
619     mfc_context->aux_batchbuffer_surface.pitch = 16;
620     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
621     mfc_context->aux_batchbuffer_surface.size_block = 16;
622
623     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
624 }
625
626 static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
627                                       struct encode_state *encode_state,
628                                       struct intel_encoder_context *encoder_context)
629 {
630     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
631
632     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
633     mfc_context->set_surface_state(ctx, encoder_context);
634     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
635     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
636     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
637     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
638     mfc_context->avc_qm_state(ctx, encoder_context);
639     mfc_context->avc_fqm_state(ctx, encoder_context);
640     gen6_mfc_avc_directmode_state(ctx, encoder_context); 
641     gen6_mfc_avc_ref_idx_state(ctx, encoder_context);
642 }
643
644
645 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
646                              struct encode_state *encode_state,
647                              struct intel_encoder_context *encoder_context)
648 {
649     struct intel_batchbuffer *batch = encoder_context->base.batch;
650
651     intel_batchbuffer_flush(batch);             //run the pipeline
652
653     return VA_STATUS_SUCCESS;
654 }
655
656 static VAStatus
657 gen6_mfc_stop(VADriverContextP ctx, 
658               struct encode_state *encode_state,
659               struct intel_encoder_context *encoder_context,
660               int *encoded_bits_size)
661 {
662     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
663     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
664     VACodedBufferSegment *coded_buffer_segment;
665     
666     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
667     assert(vaStatus == VA_STATUS_SUCCESS);
668     *encoded_bits_size = coded_buffer_segment->size * 8;
669     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
670
671     return VA_STATUS_SUCCESS;
672 }
673
674 #if __SOFTWARE__
675
676 static int
677 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
678                               struct intel_encoder_context *encoder_context,
679                               unsigned char target_mb_size, unsigned char max_mb_size,
680                               struct intel_batchbuffer *batch)
681 {
682     int len_in_dwords = 11;
683
684     if (batch == NULL)
685         batch = encoder_context->base.batch;
686
687     BEGIN_BCS_BATCH(batch, len_in_dwords);
688
689     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
690     OUT_BCS_BATCH(batch, 0);
691     OUT_BCS_BATCH(batch, 0);
692     OUT_BCS_BATCH(batch, 
693                   (0 << 24) |           /* PackedMvNum, Debug*/
694                   (0 << 20) |           /* No motion vector */
695                   (1 << 19) |           /* CbpDcY */
696                   (1 << 18) |           /* CbpDcU */
697                   (1 << 17) |           /* CbpDcV */
698                   (msg[0] & 0xFFFF) );
699
700     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
701     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
702     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
703
704     /*Stuff for Intra MB*/
705     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
706     OUT_BCS_BATCH(batch, msg[2]);       
707     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
708     
709     /*MaxSizeInWord and TargetSzieInWord*/
710     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
711                   (target_mb_size << 16) );
712
713     ADVANCE_BCS_BATCH(batch);
714
715     return len_in_dwords;
716 }
717
718 static int
719 gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
720                               unsigned int *msg, unsigned int offset,
721                               struct intel_encoder_context *encoder_context,
722                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
723                               struct intel_batchbuffer *batch)
724 {
725     int len_in_dwords = 11;
726
727     if (batch == NULL)
728         batch = encoder_context->base.batch;
729
730     BEGIN_BCS_BATCH(batch, len_in_dwords);
731
732     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
733
734     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
735     OUT_BCS_BATCH(batch, offset);
736
737     OUT_BCS_BATCH(batch, msg[0]);
738
739     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
740     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
741 #if 0 
742     if ( slice_type == SLICE_TYPE_B) {
743         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
744     } else {
745         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
746     }
747 #else
748     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
749 #endif
750
751
752     /*Stuff for Inter MB*/
753     OUT_BCS_BATCH(batch, msg[1]);        
754     OUT_BCS_BATCH(batch, 0x0);    
755     OUT_BCS_BATCH(batch, 0x0);        
756
757     /*MaxSizeInWord and TargetSzieInWord*/
758     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
759                   (target_mb_size << 16) );
760
761     ADVANCE_BCS_BATCH(batch);
762
763     return len_in_dwords;
764 }
765
766 static void 
767 gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
768                                        struct encode_state *encode_state,
769                                        struct intel_encoder_context *encoder_context,
770                                        int slice_index,
771                                        struct intel_batchbuffer *slice_batch)
772 {
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774     struct gen6_vme_context *vme_context = encoder_context->vme_context;
775     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
776     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
777     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
778     unsigned int *msg = NULL, offset = 0;
779     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
780     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
781     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
782     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
783     int i,x,y;
784     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
785     unsigned int rate_control_mode = encoder_context->rate_control_mode;
786     unsigned char *slice_header = NULL;
787     int slice_header_length_in_bits = 0;
788     unsigned int tail_data[] = { 0x0, 0x0 };
789     int slice_type = pSliceParameter->slice_type;
790
791
792     if (rate_control_mode == VA_RC_CBR) {
793         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
794         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
795     }
796
797     /* only support for 8-bit pixel bit-depth */
798     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
799     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
800     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
801     assert(qp >= 0 && qp < 52);
802
803     gen6_mfc_avc_slice_state(ctx, 
804                              pPicParameter,
805                              pSliceParameter,
806                              encode_state, encoder_context,
807                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
808
809     if ( slice_index == 0) 
810         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
811
812     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
813
814     // slice hander
815     mfc_context->insert_object(ctx, encoder_context,
816                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
817                                5,  /* first 5 bytes are start code + nal unit type */
818                                1, 0, 1, slice_batch);
819
820     dri_bo_map(vme_context->vme_output.bo , 1);
821     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
822
823     if (is_intra) {
824         msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
825     } else {
826         msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
827         msg += 32; /* the first 32 DWs are MVs */
828         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
829     }
830    
831     for (i = pSliceParameter->macroblock_address; 
832          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
833         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
834         x = i % width_in_mbs;
835         y = i / width_in_mbs;
836
837         if (is_intra) {
838             assert(msg);
839             gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
840             msg += INTRA_VME_OUTPUT_IN_DWS;
841         } else {
842             if (msg[0] & INTRA_MB_FLAG_MASK) {
843                 gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
844             } else {
845                 gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
846             }
847
848             msg += INTER_VME_OUTPUT_IN_DWS;
849             offset += INTER_VME_OUTPUT_IN_BYTES;
850         }
851     }
852    
853     dri_bo_unmap(vme_context->vme_output.bo);
854
855     if ( last_slice ) {    
856         mfc_context->insert_object(ctx, encoder_context,
857                                    tail_data, 2, 8,
858                                    2, 1, 1, 0, slice_batch);
859     } else {
860         mfc_context->insert_object(ctx, encoder_context,
861                                    tail_data, 1, 8,
862                                    1, 1, 1, 0, slice_batch);
863     }
864
865     free(slice_header);
866
867 }
868
869 static dri_bo *
870 gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
871                                   struct encode_state *encode_state,
872                                   struct intel_encoder_context *encoder_context)
873 {
874     struct i965_driver_data *i965 = i965_driver_data(ctx);
875     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
876     dri_bo *batch_bo = batch->buffer;
877     int i;
878
879     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
880         gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
881     }
882
883     intel_batchbuffer_align(batch, 8);
884     
885     BEGIN_BCS_BATCH(batch, 2);
886     OUT_BCS_BATCH(batch, 0);
887     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
888     ADVANCE_BCS_BATCH(batch);
889
890     dri_bo_reference(batch_bo);
891     intel_batchbuffer_free(batch);
892
893     return batch_bo;
894 }
895
896 #else
897
898 static void
899 gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
900                                     struct encode_state *encode_state,
901                                     struct intel_encoder_context *encoder_context)
902
903 {
904     struct gen6_vme_context *vme_context = encoder_context->vme_context;
905     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
906
907     assert(vme_context->vme_output.bo);
908     mfc_context->buffer_suface_setup(ctx,
909                                      &mfc_context->gpe_context,
910                                      &vme_context->vme_output,
911                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
912                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
913     assert(mfc_context->aux_batchbuffer_surface.bo);
914     mfc_context->buffer_suface_setup(ctx,
915                                      &mfc_context->gpe_context,
916                                      &mfc_context->aux_batchbuffer_surface,
917                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
918                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
919 }
920
921 static void
922 gen6_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
923                                      struct encode_state *encode_state,
924                                      struct intel_encoder_context *encoder_context)
925
926 {
927     struct i965_driver_data *i965 = i965_driver_data(ctx);
928     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
929     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
930     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
931     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
932     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
933     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
934     mfc_context->mfc_batchbuffer_surface.pitch = 16;
935     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
936                                                            "MFC batchbuffer",
937                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
938                                                            0x1000);
939     mfc_context->buffer_suface_setup(ctx,
940                                      &mfc_context->gpe_context,
941                                      &mfc_context->mfc_batchbuffer_surface,
942                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
943                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
944 }
945
946 static void
947 gen6_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
948                                     struct encode_state *encode_state,
949                                     struct intel_encoder_context *encoder_context)
950 {
951     gen6_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
952     gen6_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
953 }
954
955 static void
956 gen6_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
957                                 struct encode_state *encode_state,
958                                 struct intel_encoder_context *encoder_context)
959 {
960     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
961     struct gen6_interface_descriptor_data *desc;   
962     int i;
963     dri_bo *bo;
964
965     bo = mfc_context->gpe_context.idrt.bo;
966     dri_bo_map(bo, 1);
967     assert(bo->virtual);
968     desc = bo->virtual;
969
970     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
971         struct i965_kernel *kernel;
972
973         kernel = &mfc_context->gpe_context.kernels[i];
974         assert(sizeof(*desc) == 32);
975
976         /*Setup the descritor table*/
977         memset(desc, 0, sizeof(*desc));
978         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
979         desc->desc2.sampler_count = 0;
980         desc->desc2.sampler_state_pointer = 0;
981         desc->desc3.binding_table_entry_count = 2;
982         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
983         desc->desc4.constant_urb_entry_read_offset = 0;
984         desc->desc4.constant_urb_entry_read_length = 4;
985                 
986         /*kernel start*/
987         dri_bo_emit_reloc(bo,   
988                           I915_GEM_DOMAIN_INSTRUCTION, 0,
989                           0,
990                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
991                           kernel->bo);
992         desc++;
993     }
994
995     dri_bo_unmap(bo);
996 }
997
998 static void
999 gen6_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1000                                     struct encode_state *encode_state,
1001                                     struct intel_encoder_context *encoder_context)
1002 {
1003     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1004     
1005     (void)mfc_context;
1006 }
1007
1008 static void
1009 gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1010                                          int index,
1011                                          int head_offset,
1012                                          int batchbuffer_offset,
1013                                          int head_size,
1014                                          int tail_size,
1015                                          int number_mb_cmds,
1016                                          int first_object,
1017                                          int last_object,
1018                                          int last_slice,
1019                                          int mb_x,
1020                                          int mb_y,
1021                                          int width_in_mbs,
1022                                          int qp)
1023 {
1024     BEGIN_BATCH(batch, 12);
1025     
1026     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1027     OUT_BATCH(batch, index);
1028     OUT_BATCH(batch, 0);
1029     OUT_BATCH(batch, 0);
1030     OUT_BATCH(batch, 0);
1031     OUT_BATCH(batch, 0);
1032    
1033     /*inline data */
1034     OUT_BATCH(batch, head_offset);
1035     OUT_BATCH(batch, batchbuffer_offset);
1036     OUT_BATCH(batch, 
1037               head_size << 16 |
1038               tail_size);
1039     OUT_BATCH(batch,
1040               number_mb_cmds << 16 |
1041               first_object << 2 |
1042               last_object << 1 |
1043               last_slice);
1044     OUT_BATCH(batch,
1045               mb_y << 8 |
1046               mb_x);
1047     OUT_BATCH(batch,
1048               qp << 16 |
1049               width_in_mbs);
1050
1051     ADVANCE_BATCH(batch);
1052 }
1053
1054 static void
1055 gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1056                                        struct intel_encoder_context *encoder_context,
1057                                        VAEncSliceParameterBufferH264 *slice_param,
1058                                        int head_offset,
1059                                        unsigned short head_size,
1060                                        unsigned short tail_size,
1061                                        int batchbuffer_offset,
1062                                        int qp,
1063                                        int last_slice)
1064 {
1065     struct intel_batchbuffer *batch = encoder_context->base.batch;
1066     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1067     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1068     int total_mbs = slice_param->num_macroblocks;
1069     int number_mb_cmds = 128;
1070     int starting_mb = 0;
1071     int last_object = 0;
1072     int first_object = 1;
1073     int i;
1074     int mb_x, mb_y;
1075     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1076
1077     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1078         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1079         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1080         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1081         assert(mb_x <= 255 && mb_y <= 255);
1082
1083         starting_mb += number_mb_cmds;
1084
1085         gen6_mfc_batchbuffer_emit_object_command(batch,
1086                                                  index,
1087                                                  head_offset,
1088                                                  batchbuffer_offset,
1089                                                  head_size,
1090                                                  tail_size,
1091                                                  number_mb_cmds,
1092                                                  first_object,
1093                                                  last_object,
1094                                                  last_slice,
1095                                                  mb_x,
1096                                                  mb_y,
1097                                                  width_in_mbs,
1098                                                  qp);
1099
1100         if (first_object) {
1101             head_offset += head_size;
1102             batchbuffer_offset += head_size;
1103         }
1104
1105         if (last_object) {
1106             head_offset += tail_size;
1107             batchbuffer_offset += tail_size;
1108         }
1109
1110         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1111
1112         first_object = 0;
1113     }
1114
1115     if (!last_object) {
1116         last_object = 1;
1117         number_mb_cmds = total_mbs % number_mb_cmds;
1118         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1119         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1120         assert(mb_x <= 255 && mb_y <= 255);
1121         starting_mb += number_mb_cmds;
1122
1123         gen6_mfc_batchbuffer_emit_object_command(batch,
1124                                                  index,
1125                                                  head_offset,
1126                                                  batchbuffer_offset,
1127                                                  head_size,
1128                                                  tail_size,
1129                                                  number_mb_cmds,
1130                                                  first_object,
1131                                                  last_object,
1132                                                  last_slice,
1133                                                  mb_x,
1134                                                  mb_y,
1135                                                  width_in_mbs,
1136                                                  qp);
1137     }
1138 }
1139                           
1140 /*
1141  * return size in Owords (16bytes)
1142  */         
1143 static int
1144 gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1145                                struct encode_state *encode_state,
1146                                struct intel_encoder_context *encoder_context,
1147                                int slice_index,
1148                                int batchbuffer_offset)
1149 {
1150     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1151     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1152     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1153     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1154     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1155     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1156     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1157     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1158     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1159     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1160     unsigned char *slice_header = NULL;
1161     int slice_header_length_in_bits = 0;
1162     unsigned int tail_data[] = { 0x0, 0x0 };
1163     long head_offset;
1164     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1165     unsigned short head_size, tail_size;
1166     int slice_type = pSliceParameter->slice_type;
1167
1168     if (rate_control_mode == VA_RC_CBR) {
1169         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1170         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1171     }
1172
1173     /* only support for 8-bit pixel bit-depth */
1174     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1175     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1176     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1177     assert(qp >= 0 && qp < 52);
1178
1179     head_offset = old_used / 16;
1180     gen6_mfc_avc_slice_state(ctx,
1181                              pPicParameter,
1182                              pSliceParameter,
1183                              encode_state,
1184                              encoder_context,
1185                              (rate_control_mode == VA_RC_CBR),
1186                              qp,
1187                              slice_batch);
1188
1189     if (slice_index == 0)
1190         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1191
1192     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1193
1194     // slice hander
1195     mfc_context->insert_object(ctx,
1196                                encoder_context,
1197                                (unsigned int *)slice_header,
1198                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1199                                slice_header_length_in_bits & 0x1f,
1200                                5,  /* first 5 bytes are start code + nal unit type */
1201                                1,
1202                                0,
1203                                1,
1204                                slice_batch);
1205     free(slice_header);
1206
1207     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1208     used = intel_batchbuffer_used_size(slice_batch);
1209     head_size = (used - old_used) / 16;
1210     old_used = used;
1211
1212     /* tail */
1213     if (last_slice) {    
1214         mfc_context->insert_object(ctx,
1215                                    encoder_context,
1216                                    tail_data,
1217                                    2,
1218                                    8,
1219                                    2,
1220                                    1,
1221                                    1,
1222                                    0,
1223                                    slice_batch);
1224     } else {
1225         mfc_context->insert_object(ctx,
1226                                    encoder_context,
1227                                    tail_data,
1228                                    1,
1229                                    8,
1230                                    1,
1231                                    1,
1232                                    1,
1233                                    0,
1234                                    slice_batch);
1235     }
1236
1237     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1238     used = intel_batchbuffer_used_size(slice_batch);
1239     tail_size = (used - old_used) / 16;
1240
1241    
1242     gen6_mfc_avc_batchbuffer_slice_command(ctx,
1243                                            encoder_context,
1244                                            pSliceParameter,
1245                                            head_offset,
1246                                            head_size,
1247                                            tail_size,
1248                                            batchbuffer_offset,
1249                                            qp,
1250                                            last_slice);
1251
1252     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1253 }
1254
1255 static void
1256 gen6_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1257                                   struct encode_state *encode_state,
1258                                   struct intel_encoder_context *encoder_context)
1259 {
1260     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1261     struct intel_batchbuffer *batch = encoder_context->base.batch;
1262     int i, size, offset = 0;
1263     intel_batchbuffer_start_atomic(batch, 0x4000); 
1264     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1265
1266     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1267         size = gen6_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1268         offset += size;
1269     }
1270
1271     intel_batchbuffer_end_atomic(batch);
1272     intel_batchbuffer_flush(batch);
1273 }
1274
1275 static void
1276 gen6_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1277                                struct encode_state *encode_state,
1278                                struct intel_encoder_context *encoder_context)
1279 {
1280     gen6_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1281     gen6_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1282     gen6_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1283     gen6_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1284 }
1285
1286 static dri_bo *
1287 gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1288                                   struct encode_state *encode_state,
1289                                   struct intel_encoder_context *encoder_context)
1290 {
1291     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1292
1293     gen6_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1294     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1295
1296     return mfc_context->mfc_batchbuffer_surface.bo;
1297 }
1298
1299 #endif
1300
1301
1302 static void
1303 gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
1304                                  struct encode_state *encode_state,
1305                                  struct intel_encoder_context *encoder_context)
1306 {
1307     struct intel_batchbuffer *batch = encoder_context->base.batch;
1308     dri_bo *slice_batch_bo;
1309
1310     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1311         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1312         assert(0);
1313         return; 
1314     }
1315
1316 #if __SOFTWARE__
1317     slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1318 #else
1319     slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1320 #endif
1321
1322     // begin programing
1323     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1324     intel_batchbuffer_emit_mi_flush(batch);
1325     
1326     // picture level programing
1327     gen6_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1328
1329     BEGIN_BCS_BATCH(batch, 2);
1330     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1331     OUT_BCS_RELOC(batch,
1332                   slice_batch_bo,
1333                   I915_GEM_DOMAIN_COMMAND, 0, 
1334                   0);
1335     ADVANCE_BCS_BATCH(batch);
1336
1337     // end programing
1338     intel_batchbuffer_end_atomic(batch);
1339
1340     dri_bo_unreference(slice_batch_bo);
1341 }
1342
1343 static VAStatus
1344 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1345                             struct encode_state *encode_state,
1346                             struct intel_encoder_context *encoder_context)
1347 {
1348     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1349     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1350     int current_frame_bits_size;
1351     int sts;
1352  
1353     for (;;) {
1354         gen6_mfc_init(ctx, encode_state, encoder_context);
1355         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1356         /*Programing bcs pipeline*/
1357         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
1358         gen6_mfc_run(ctx, encode_state, encoder_context);
1359         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1360             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1361             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1362             if (sts == BRC_NO_HRD_VIOLATION) {
1363                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1364                 break;
1365             }
1366             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1367                 if (!mfc_context->hrd.violation_noted) {
1368                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1369                     mfc_context->hrd.violation_noted = 1;
1370                 }
1371                 return VA_STATUS_SUCCESS;
1372             }
1373         } else {
1374             break;
1375         }
1376     }
1377
1378     return VA_STATUS_SUCCESS;
1379 }
1380
1381 /*
1382  * MPEG-2
1383  */
1384
1385 static void
1386 gen6_mfc_qm_state(VADriverContextP ctx,
1387                   int qm_type,
1388                   unsigned int *qm,
1389                   int qm_length,
1390                   struct intel_encoder_context *encoder_context)
1391 {
1392     struct intel_batchbuffer *batch = encoder_context->base.batch;
1393     unsigned int qm_buffer[16];
1394
1395     assert(qm_length <= 16);
1396     assert(sizeof(*qm) == 4);
1397     memcpy(qm_buffer, qm, qm_length * 4);
1398
1399     BEGIN_BCS_BATCH(batch, 18);
1400     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
1401     OUT_BCS_BATCH(batch, qm_type << 0);
1402     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
1403     ADVANCE_BCS_BATCH(batch);
1404 }
1405
1406 static void
1407 gen6_mfc_fqm_state(VADriverContextP ctx,
1408                    int fqm_type,
1409                    unsigned int *fqm,
1410                    int fqm_length,
1411                    struct intel_encoder_context *encoder_context)
1412 {
1413     struct intel_batchbuffer *batch = encoder_context->base.batch;
1414     unsigned int fqm_buffer[32];
1415
1416     assert(fqm_length <= 32);
1417     assert(sizeof(*fqm) == 4);
1418     memcpy(fqm_buffer, fqm, fqm_length * 4);
1419
1420     BEGIN_BCS_BATCH(batch, 34);
1421     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
1422     OUT_BCS_BATCH(batch, fqm_type << 0);
1423     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
1424     ADVANCE_BCS_BATCH(batch);
1425 }
1426
1427 static const int
1428 va_to_gen6_mpeg2_picture_type[3] = {
1429     1,  /* I */
1430     2,  /* P */
1431     3   /* B */
1432 };
1433
1434 static void
1435 gen6_mfc_mpeg2_pic_state(VADriverContextP ctx,
1436                           struct intel_encoder_context *encoder_context,
1437                           struct encode_state *encode_state)
1438 {
1439     struct intel_batchbuffer *batch = encoder_context->base.batch;
1440     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1441     VAEncPictureParameterBufferMPEG2 *pic_param;
1442     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1443     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1444
1445     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
1446     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1447
1448     BEGIN_BCS_BATCH(batch, 13);
1449     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1450     OUT_BCS_BATCH(batch,
1451                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
1452                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
1453                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
1454                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
1455                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1456                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1457                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1458                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1459                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1460                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1461                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1462                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1463     OUT_BCS_BATCH(batch,
1464                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
1465                   va_to_gen6_mpeg2_picture_type[pic_param->picture_type] << 9 |
1466                   0);
1467     OUT_BCS_BATCH(batch,
1468                   1 << 31 |     /* slice concealment */
1469                   (height_in_mbs - 1) << 16 |
1470                   (width_in_mbs - 1));
1471     OUT_BCS_BATCH(batch, 0);
1472     OUT_BCS_BATCH(batch, 0);
1473     OUT_BCS_BATCH(batch,
1474                   0xFFF << 16 | /* InterMBMaxSize */
1475                   0xFFF << 0 |  /* IntraMBMaxSize */
1476                   0);
1477     OUT_BCS_BATCH(batch, 0);
1478     OUT_BCS_BATCH(batch, 0);
1479     OUT_BCS_BATCH(batch, 0);
1480     OUT_BCS_BATCH(batch, 0);
1481     OUT_BCS_BATCH(batch, 0);
1482     OUT_BCS_BATCH(batch, 0);
1483     ADVANCE_BCS_BATCH(batch);
1484 }
1485
1486 static void
1487 gen6_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1488 {
1489     unsigned char intra_qm[64] = {
1490          8, 16, 19, 22, 26, 27, 29, 34,
1491         16, 16, 22, 24, 27, 29, 34, 37,
1492         19, 22, 26, 27, 29, 34, 34, 38,
1493         22, 22, 26, 27, 29, 34, 37, 40,
1494         22, 26, 27, 29, 32, 35, 40, 48,
1495         26, 27, 29, 32, 35, 40, 48, 58,
1496         26, 27, 29, 34, 38, 46, 56, 69,
1497         27, 29, 35, 38, 46, 56, 69, 83
1498     };
1499
1500     unsigned char non_intra_qm[64] = {
1501         16, 16, 16, 16, 16, 16, 16, 16,
1502         16, 16, 16, 16, 16, 16, 16, 16,
1503         16, 16, 16, 16, 16, 16, 16, 16,
1504         16, 16, 16, 16, 16, 16, 16, 16,
1505         16, 16, 16, 16, 16, 16, 16, 16,
1506         16, 16, 16, 16, 16, 16, 16, 16,
1507         16, 16, 16, 16, 16, 16, 16, 16,
1508         16, 16, 16, 16, 16, 16, 16, 16
1509     };
1510
1511     gen6_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
1512     gen6_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
1513 }
1514
1515 static void
1516 gen6_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1517 {
1518     unsigned short intra_fqm[64] = {
1519          65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
1520          65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
1521          65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
1522          65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
1523          65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
1524          65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
1525          65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
1526          65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
1527     };
1528
1529     unsigned short non_intra_fqm[64] = {
1530         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1531         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1532         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1533         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1534         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1535         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1536         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1537         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
1538     };
1539
1540     gen6_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
1541     gen6_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
1542 }
1543
1544 static void
1545 gen6_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
1546                                  struct intel_encoder_context *encoder_context,
1547                                  int x, int y,
1548                                  int next_x, int next_y,
1549                                  int is_fisrt_slice_group,
1550                                  int is_last_slice_group,
1551                                  int intra_slice,
1552                                  int qp,
1553                                  struct intel_batchbuffer *batch)
1554 {
1555     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1556
1557     if (batch == NULL)
1558         batch = encoder_context->base.batch;
1559
1560     BEGIN_BCS_BATCH(batch, 8);
1561
1562     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
1563     OUT_BCS_BATCH(batch,
1564                   0 << 31 |                             /* MbRateCtrlFlag */
1565                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
1566                   1 << 17 |                             /* Insert Header before the first slice group data */
1567                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
1568                   1 << 15 |                             /* TailPresentFlag: always 1 */
1569                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
1570                   !!intra_slice << 13 |                 /* IntraSlice */
1571                   !!intra_slice << 12 |                 /* IntraSliceFlag */
1572                   0);
1573     OUT_BCS_BATCH(batch,
1574                   next_y << 24 |
1575                   next_x << 16 |
1576                   y << 8 |
1577                   x << 0 |
1578                   0);
1579     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
1580     /* bitstream pointer is only loaded once for the first slice of a frame when 
1581      * LoadSlicePointerFlag is 0
1582      */
1583     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1584     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
1585     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
1586     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
1587
1588     ADVANCE_BCS_BATCH(batch);
1589 }
1590
1591 static int
1592 gen6_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
1593                                  struct intel_encoder_context *encoder_context,
1594                                  int x, int y,
1595                                  int first_mb_in_slice,
1596                                  int last_mb_in_slice,
1597                                  int first_mb_in_slice_group,
1598                                  int last_mb_in_slice_group,
1599                                  int mb_type,
1600                                  int qp_scale_code,
1601                                  int coded_block_pattern,
1602                                  unsigned char target_size_in_word,
1603                                  unsigned char max_size_in_word,
1604                                  struct intel_batchbuffer *batch)
1605 {
1606     int len_in_dwords = 9;
1607
1608     if (batch == NULL)
1609         batch = encoder_context->base.batch;
1610
1611     BEGIN_BCS_BATCH(batch, len_in_dwords);
1612
1613     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1614     OUT_BCS_BATCH(batch,
1615                   0 << 24 |     /* PackedMvNum */
1616                   0 << 20 |     /* MvFormat */
1617                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1618                   0 << 15 |     /* TransformFlag: frame DCT */
1619                   0 << 14 |     /* FieldMbFlag */
1620                   1 << 13 |     /* IntraMbFlag */
1621                   mb_type << 8 |   /* MbType: Intra */
1622                   0 << 2 |      /* SkipMbFlag */
1623                   0 << 0 |      /* InterMbMode */
1624                   0);
1625     OUT_BCS_BATCH(batch, y << 16 | x);
1626     OUT_BCS_BATCH(batch,
1627                   max_size_in_word << 24 |
1628                   target_size_in_word << 16 |
1629                   coded_block_pattern << 6 |      /* CBP */
1630                   0);
1631     OUT_BCS_BATCH(batch,
1632                   last_mb_in_slice << 31 |
1633                   first_mb_in_slice << 30 |
1634                   0 << 27 |     /* EnableCoeffClamp */
1635                   last_mb_in_slice_group << 26 |
1636                   0 << 25 |     /* MbSkipConvDisable */
1637                   first_mb_in_slice_group << 24 |
1638                   0 << 16 |     /* MvFieldSelect */
1639                   qp_scale_code << 0 |
1640                   0);
1641     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
1642     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
1643     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1644     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1645
1646     ADVANCE_BCS_BATCH(batch);
1647
1648     return len_in_dwords;
1649 }
1650
1651 #define MV_OFFSET_IN_WORD       112
1652
1653 static struct _mv_ranges
1654 {
1655     int low;    /* in the unit of 1/2 pixel */
1656     int high;   /* in the unit of 1/2 pixel */
1657 } mv_ranges[] = {
1658     {0, 0},
1659     {-16, 15},
1660     {-32, 31},
1661     {-64, 63},
1662     {-128, 127},
1663     {-256, 255},
1664     {-512, 511},
1665     {-1024, 1023},
1666     {-2048, 2047},
1667     {-4096, 4095}
1668 };
1669
1670 static int
1671 mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
1672 {
1673     if (mv + pos * 16 * 2 < 0 ||
1674         mv + (pos + 1) * 16 * 2 > display_max * 2)
1675         mv = 0;
1676
1677     if (f_code > 0 && f_code < 10) {
1678         if (mv < mv_ranges[f_code].low)
1679             mv = mv_ranges[f_code].low;
1680
1681         if (mv > mv_ranges[f_code].high)
1682             mv = mv_ranges[f_code].high;
1683     }
1684
1685     return mv;
1686 }
1687
1688 static int
1689 gen6_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
1690                                  struct encode_state *encode_state,
1691                                  struct intel_encoder_context *encoder_context,
1692                                  unsigned int *msg,
1693                                  int width_in_mbs, int height_in_mbs,
1694                                  int x, int y,
1695                                  int first_mb_in_slice,
1696                                  int last_mb_in_slice,
1697                                  int first_mb_in_slice_group,
1698                                  int last_mb_in_slice_group,
1699                                  int qp_scale_code,
1700                                  unsigned char target_size_in_word,
1701                                  unsigned char max_size_in_word,
1702                                  struct intel_batchbuffer *batch)
1703 {
1704     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1705     int len_in_dwords = 9;
1706     short *mvptr, mvx0, mvy0, mvx1, mvy1;
1707  
1708     if (batch == NULL)
1709         batch = encoder_context->base.batch;
1710
1711     mvptr = (short *)msg;
1712     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
1713     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
1714     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
1715     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
1716
1717     BEGIN_BCS_BATCH(batch, len_in_dwords);
1718
1719     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
1720     OUT_BCS_BATCH(batch,
1721                   2 << 24 |     /* PackedMvNum */
1722                   7 << 20 |     /* MvFormat */
1723                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
1724                   0 << 15 |     /* TransformFlag: frame DCT */
1725                   0 << 14 |     /* FieldMbFlag */
1726                   0 << 13 |     /* IntraMbFlag */
1727                   1 << 8 |      /* MbType: Frame-based */
1728                   0 << 2 |      /* SkipMbFlag */
1729                   0 << 0 |      /* InterMbMode */
1730                   0);
1731     OUT_BCS_BATCH(batch, y << 16 | x);
1732     OUT_BCS_BATCH(batch,
1733                   max_size_in_word << 24 |
1734                   target_size_in_word << 16 |
1735                   0x3f << 6 |   /* CBP */
1736                   0);
1737     OUT_BCS_BATCH(batch,
1738                   last_mb_in_slice << 31 |
1739                   first_mb_in_slice << 30 |
1740                   0 << 27 |     /* EnableCoeffClamp */
1741                   last_mb_in_slice_group << 26 |
1742                   0 << 25 |     /* MbSkipConvDisable */
1743                   first_mb_in_slice_group << 24 |
1744                   0 << 16 |     /* MvFieldSelect */
1745                   qp_scale_code << 0 |
1746                   0);
1747
1748     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
1749     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
1750     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
1751     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
1752
1753     ADVANCE_BCS_BATCH(batch);
1754
1755     return len_in_dwords;
1756 }
1757
1758 #define INTRA_RDO_OFFSET        4
1759 #define INTER_RDO_OFFSET        54
1760 #define INTER_MSG_OFFSET        52
1761 #define INTER_MV_OFFSET         224
1762 #define RDO_MASK                0xFFFF
1763
1764 static void
1765 intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
1766                                            struct encode_state *encode_state,
1767                                            struct intel_encoder_context *encoder_context,
1768                                            struct intel_batchbuffer *slice_batch)
1769 {
1770     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1771     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
1772
1773     if (encode_state->packed_header_data[idx]) {
1774         VAEncPackedHeaderParameterBuffer *param = NULL;
1775         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1776         unsigned int length_in_bits;
1777
1778         assert(encode_state->packed_header_param[idx]);
1779         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1780         length_in_bits = param->bit_length;
1781
1782         mfc_context->insert_object(ctx,
1783                                    encoder_context,
1784                                    header_data,
1785                                    ALIGN(length_in_bits, 32) >> 5,
1786                                    length_in_bits & 0x1f,
1787                                    5,   /* FIXME: check it */
1788                                    0,
1789                                    0,
1790                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1791                                    slice_batch);
1792     }
1793
1794     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
1795
1796     if (encode_state->packed_header_data[idx]) {
1797         VAEncPackedHeaderParameterBuffer *param = NULL;
1798         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1799         unsigned int length_in_bits;
1800
1801         assert(encode_state->packed_header_param[idx]);
1802         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1803         length_in_bits = param->bit_length;
1804
1805         mfc_context->insert_object(ctx,
1806                                    encoder_context,
1807                                    header_data,
1808                                    ALIGN(length_in_bits, 32) >> 5,
1809                                    length_in_bits & 0x1f,
1810                                    5,   /* FIXME: check it */
1811                                    0,
1812                                    0,
1813                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
1814                                    slice_batch);
1815     }
1816 }
1817
1818 static void 
1819 gen6_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
1820                                      struct encode_state *encode_state,
1821                                      struct intel_encoder_context *encoder_context,
1822                                      int slice_index,
1823                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
1824                                      struct intel_batchbuffer *slice_batch)
1825 {
1826     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1827     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1828     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1829     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1830     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
1831     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
1832     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1833     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1834     int i, j;
1835     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
1836     unsigned int *msg = NULL, offset = 0;
1837     unsigned char *msg_ptr = NULL;
1838
1839     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
1840     h_start_pos = slice_param->macroblock_address % width_in_mbs;
1841     v_start_pos = slice_param->macroblock_address / width_in_mbs;
1842     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
1843
1844     dri_bo_map(vme_context->vme_output.bo , 0);
1845     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1846
1847     if (next_slice_group_param) {
1848         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
1849         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
1850     } else {
1851         h_next_start_pos = 0;
1852         v_next_start_pos = height_in_mbs;
1853     }
1854
1855     gen6_mfc_mpeg2_slicegroup_state(ctx,
1856                                      encoder_context,
1857                                      h_start_pos,
1858                                      v_start_pos,
1859                                      h_next_start_pos,
1860                                      v_next_start_pos,
1861                                      slice_index == 0,
1862                                      next_slice_group_param == NULL,
1863                                      slice_param->is_intra_slice,
1864                                      slice_param->quantiser_scale_code,
1865                                      slice_batch);
1866
1867     if (slice_index == 0) 
1868         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1869
1870     /* Insert '00' to make sure the header is valid */
1871     mfc_context->insert_object(ctx,
1872                                encoder_context,
1873                                (unsigned int*)section_delimiter,
1874                                1,
1875                                8,   /* 8bits in the last DWORD */
1876                                1,   /* 1 byte */
1877                                1,
1878                                0,
1879                                0,
1880                                slice_batch);
1881
1882     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
1883         /* PAK for each macroblocks */
1884         for (j = 0; j < slice_param->num_macroblocks; j++) {
1885             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
1886             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
1887             int first_mb_in_slice = (j == 0);
1888             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
1889             int first_mb_in_slice_group = (i == 0 && j == 0);
1890             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
1891                                           j == slice_param->num_macroblocks - 1);
1892
1893             if (slice_param->is_intra_slice) {
1894                 gen6_mfc_mpeg2_pak_object_intra(ctx,
1895                                                  encoder_context,
1896                                                  h_pos, v_pos,
1897                                                  first_mb_in_slice,
1898                                                  last_mb_in_slice,
1899                                                  first_mb_in_slice_group,
1900                                                  last_mb_in_slice_group,
1901                                                  0x1a,
1902                                                  slice_param->quantiser_scale_code,
1903                                                  0x3f,
1904                                                  0,
1905                                                  0xff,
1906                                                  slice_batch);
1907             } else {
1908                 msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
1909
1910                 if(msg[32] & INTRA_MB_FLAG_MASK) {
1911                      gen6_mfc_mpeg2_pak_object_intra(ctx,
1912                                                      encoder_context,
1913                                                      h_pos, v_pos,
1914                                                      first_mb_in_slice,
1915                                                      last_mb_in_slice,
1916                                                      first_mb_in_slice_group,
1917                                                      last_mb_in_slice_group,
1918                                                      0x1a,
1919                                                      slice_param->quantiser_scale_code,
1920                                                      0x3f,
1921                                                      0,
1922                                                      0xff,
1923                                                      slice_batch);
1924                  } else {
1925
1926                     gen6_mfc_mpeg2_pak_object_inter(ctx,
1927                                                     encode_state,
1928                                                     encoder_context,
1929                                                     msg,
1930                                                     width_in_mbs, height_in_mbs,
1931                                                     h_pos, v_pos,
1932                                                     first_mb_in_slice,
1933                                                     last_mb_in_slice,
1934                                                     first_mb_in_slice_group,
1935                                                     last_mb_in_slice_group,
1936                                                     slice_param->quantiser_scale_code,
1937                                                     0,
1938                                                     0xff,
1939                                                     slice_batch);
1940               }
1941            }
1942         }
1943
1944         slice_param++;
1945     }
1946
1947     dri_bo_unmap(vme_context->vme_output.bo);
1948
1949     /* tail data */
1950     if (next_slice_group_param == NULL) { /* end of a picture */
1951         mfc_context->insert_object(ctx,
1952                                    encoder_context,
1953                                    (unsigned int *)tail_delimiter,
1954                                    2,
1955                                    8,   /* 8bits in the last DWORD */
1956                                    5,   /* 5 bytes */
1957                                    1,
1958                                    1,
1959                                    0,
1960                                    slice_batch);
1961     } else {        /* end of a lsice group */
1962         mfc_context->insert_object(ctx,
1963                                    encoder_context,
1964                                    (unsigned int *)section_delimiter,
1965                                    1,
1966                                    8,   /* 8bits in the last DWORD */
1967                                    1,   /* 1 byte */
1968                                    1,
1969                                    1,
1970                                    0,
1971                                    slice_batch);
1972     }
1973 }
1974
1975 /* 
1976  * A batch buffer for all slices, including slice state, 
1977  * slice insert object and slice pak object commands
1978  *
1979  */
1980 static dri_bo *
1981 gen6_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
1982                                            struct encode_state *encode_state,
1983                                            struct intel_encoder_context *encoder_context)
1984 {
1985     struct i965_driver_data *i965 = i965_driver_data(ctx);
1986     struct intel_batchbuffer *batch;
1987     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1988     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
1989     dri_bo *batch_bo;
1990     int i;
1991     int buffer_size;
1992     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1993     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1994
1995     buffer_size = width_in_mbs * height_in_mbs * 64;
1996     batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1997     batch_bo = batch->buffer;
1998
1999     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2000         if (i == encode_state->num_slice_params_ext - 1)
2001             next_slice_group_param = NULL;
2002         else
2003             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
2004
2005         gen6_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
2006     }
2007
2008     intel_batchbuffer_align(batch, 8);
2009     
2010     BEGIN_BCS_BATCH(batch, 2);
2011     OUT_BCS_BATCH(batch, 0);
2012     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
2013     ADVANCE_BCS_BATCH(batch);
2014
2015     dri_bo_reference(batch_bo);
2016     intel_batchbuffer_free(batch);
2017
2018     return batch_bo;
2019 }
2020
2021 static void
2022 gen6_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
2023                                             struct encode_state *encode_state,
2024                                             struct intel_encoder_context *encoder_context)
2025 {
2026     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2027
2028     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
2029     mfc_context->set_surface_state(ctx, encoder_context);
2030     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
2031     gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
2032     gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
2033     gen6_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
2034     gen6_mfc_mpeg2_qm_state(ctx, encoder_context);
2035     gen6_mfc_mpeg2_fqm_state(ctx, encoder_context);
2036 }
2037
2038 static void
2039 gen6_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
2040                                     struct encode_state *encode_state,
2041                                     struct intel_encoder_context *encoder_context)
2042 {
2043     struct intel_batchbuffer *batch = encoder_context->base.batch;
2044     dri_bo *slice_batch_bo;
2045
2046     slice_batch_bo = gen6_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
2047
2048     // begin programing
2049     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
2050     intel_batchbuffer_emit_mi_flush(batch);
2051     
2052     // picture level programing
2053     gen6_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
2054
2055     BEGIN_BCS_BATCH(batch, 2);
2056     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
2057     OUT_BCS_RELOC(batch,
2058                   slice_batch_bo,
2059                   I915_GEM_DOMAIN_COMMAND, 0, 
2060                   0);
2061     ADVANCE_BCS_BATCH(batch);
2062
2063     // end programing
2064     intel_batchbuffer_end_atomic(batch);
2065
2066     dri_bo_unreference(slice_batch_bo);
2067 }
2068
2069 static VAStatus
2070 intel_mfc_mpeg2_prepare(VADriverContextP ctx,
2071                         struct encode_state *encode_state,
2072                         struct intel_encoder_context *encoder_context)
2073 {
2074     struct i965_driver_data *i965 = i965_driver_data(ctx);
2075     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2076     struct object_surface *obj_surface;
2077     struct object_buffer *obj_buffer;
2078     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
2079     struct i965_coded_buffer_segment *coded_buffer_segment;
2080     VAStatus vaStatus = VA_STATUS_SUCCESS;
2081     dri_bo *bo;
2082     int i;
2083
2084     /* reconstructed surface */
2085     obj_surface = SURFACE(pic_param->reconstructed_picture);
2086     assert(obj_surface);
2087     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2088     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
2089     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
2090     mfc_context->surface_state.width = obj_surface->orig_width;
2091     mfc_context->surface_state.height = obj_surface->orig_height;
2092     mfc_context->surface_state.w_pitch = obj_surface->width;
2093     mfc_context->surface_state.h_pitch = obj_surface->height;
2094
2095     /* forward reference */
2096     obj_surface = SURFACE(pic_param->forward_reference_picture);
2097
2098     if (obj_surface && obj_surface->bo) {
2099         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
2100         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
2101     } else
2102         mfc_context->reference_surfaces[0].bo = NULL;
2103
2104     /* backward reference */
2105     obj_surface = SURFACE(pic_param->backward_reference_picture);
2106
2107     if (obj_surface && obj_surface->bo) {
2108         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
2109         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2110     } else {
2111         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
2112
2113         if (mfc_context->reference_surfaces[1].bo)
2114             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
2115     }
2116
2117     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
2118         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
2119
2120         if (mfc_context->reference_surfaces[i].bo)
2121             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
2122     }
2123     
2124     /* input YUV surface */
2125     obj_surface = SURFACE(encoder_context->input_yuv_surface);
2126     assert(obj_surface && obj_surface->bo);
2127     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
2128     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2129
2130     /* coded buffer */
2131     obj_buffer = BUFFER(pic_param->coded_buf);
2132     bo = obj_buffer->buffer_store->bo;
2133     assert(bo);
2134     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
2135     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2136     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2137     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
2138
2139     /* set the internal flag to 0 to indicate the coded size is unknown */
2140     dri_bo_map(bo, 1);
2141     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
2142     coded_buffer_segment->mapped = 0;
2143     coded_buffer_segment->codec = CODED_MPEG2;
2144     dri_bo_unmap(bo);
2145
2146     return vaStatus;
2147 }
2148
2149 static VAStatus
2150 gen6_mfc_mpeg2_encode_picture(VADriverContextP ctx, 
2151                                struct encode_state *encode_state,
2152                                struct intel_encoder_context *encoder_context)
2153 {
2154     gen6_mfc_init(ctx, encode_state, encoder_context);
2155     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
2156     /*Programing bcs pipeline*/
2157     gen6_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
2158     gen6_mfc_run(ctx, encode_state, encoder_context);
2159
2160     return VA_STATUS_SUCCESS;
2161 }
2162
2163
2164 VAStatus
2165 gen6_mfc_pipeline(VADriverContextP ctx,
2166                   VAProfile profile,
2167                   struct encode_state *encode_state,
2168                   struct intel_encoder_context *encoder_context)
2169 {
2170     VAStatus vaStatus;
2171
2172     switch (profile) {
2173     case VAProfileH264Baseline:
2174     case VAProfileH264Main:
2175     case VAProfileH264High:
2176         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2177         break;
2178
2179     case VAProfileMPEG2Simple:
2180     case VAProfileMPEG2Main:
2181         vaStatus = gen6_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
2182         break;
2183
2184         /* FIXME: add for other profile */
2185     default:
2186         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2187         break;
2188     }
2189
2190     return vaStatus;
2191 }
2192
2193 void
2194 gen6_mfc_context_destroy(void *context)
2195 {
2196     struct gen6_mfc_context *mfc_context = context;
2197     int i;
2198
2199     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2200     mfc_context->post_deblocking_output.bo = NULL;
2201
2202     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2203     mfc_context->pre_deblocking_output.bo = NULL;
2204
2205     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2206     mfc_context->uncompressed_picture_source.bo = NULL;
2207
2208     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
2209     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2210
2211     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2212         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2213         mfc_context->direct_mv_buffers[i].bo = NULL;
2214     }
2215
2216     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2217     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2218
2219     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2220     mfc_context->macroblock_status_buffer.bo = NULL;
2221
2222     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2223     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2224
2225     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2226     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2227
2228
2229     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2230         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2231         mfc_context->reference_surfaces[i].bo = NULL;  
2232     }
2233
2234     i965_gpe_context_destroy(&mfc_context->gpe_context);
2235
2236     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2237     mfc_context->mfc_batchbuffer_surface.bo = NULL;
2238
2239     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2240     mfc_context->aux_batchbuffer_surface.bo = NULL;
2241
2242     if (mfc_context->aux_batchbuffer)
2243         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2244
2245     mfc_context->aux_batchbuffer = NULL;
2246
2247     free(mfc_context);
2248 }
2249
2250 Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2251 {
2252     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2253
2254     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2255
2256     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2257     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2258
2259     mfc_context->gpe_context.curbe.length = 32 * 4;
2260
2261     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2262     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2263     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2264     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2265     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2266
2267     i965_gpe_load_kernels(ctx,
2268                           &mfc_context->gpe_context,
2269                           gen6_mfc_kernels,
2270                           NUM_MFC_KERNEL);
2271
2272     mfc_context->pipe_mode_select = gen6_mfc_pipe_mode_select;
2273     mfc_context->set_surface_state = gen6_mfc_surface_state;
2274     mfc_context->ind_obj_base_addr_state = gen6_mfc_ind_obj_base_addr_state;
2275     mfc_context->avc_img_state = gen6_mfc_avc_img_state;
2276     mfc_context->avc_qm_state = gen6_mfc_avc_qm_state;
2277     mfc_context->avc_fqm_state = gen6_mfc_avc_fqm_state;
2278     mfc_context->insert_object = gen6_mfc_avc_insert_object;
2279     mfc_context->buffer_suface_setup = i965_gpe_buffer_suface_setup;
2280
2281     encoder_context->mfc_context = mfc_context;
2282     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
2283     encoder_context->mfc_pipeline = gen6_mfc_pipeline;
2284     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
2285
2286     return True;
2287 }