i965_drv_video: generate slice header in driver
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41
42 static void
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
44 {
45     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
46
47     BEGIN_BCS_BATCH(batch, 4);
48
49     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
50     OUT_BCS_BATCH(batch,
51                   (0 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52                   (1 << 9)  | /* Post Deblocking Output */
53                   (0 << 8)  | /* Pre Deblocking Output */
54                   (0 << 7)  | /* disable TLB prefectch */
55                   (0 << 5)  | /* not in stitch mode */
56                   (1 << 4)  | /* encoding mode */
57                   (2 << 0));  /* Standard Select: AVC */
58     OUT_BCS_BATCH(batch,
59                   (0 << 20) | /* round flag in PB slice */
60                   (0 << 19) | /* round flag in Intra8x8 */
61                   (0 << 7)  | /* expand NOA bus flag */
62                   (1 << 6)  | /* must be 1 */
63                   (0 << 5)  | /* disable clock gating for NOA */
64                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
65                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
66                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
67                   (0 << 1)  | /* AVC long field motion vector */
68                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
69     OUT_BCS_BATCH(batch, 0);
70
71     ADVANCE_BCS_BATCH(batch);
72 }
73
74 static void
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
76                           int standard_select,
77                           struct gen6_encoder_context *gen6_encoder_context)
78 {
79     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86     OUT_BCS_BATCH(batch,
87                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88                   (MFD_MODE_VLD << 15) | /* VLD mode */
89                   (0 << 10) | /* disable Stream-Out */
90                   (1 << 9)  | /* Post Deblocking Output */
91                   (0 << 8)  | /* Pre Deblocking Output */
92                   (0 << 5)  | /* not in stitch mode */
93                   (1 << 4)  | /* encoding mode */
94                   (standard_select << 0));  /* standard select: avc or mpeg2 */
95     OUT_BCS_BATCH(batch,
96                   (0 << 7)  | /* expand NOA bus flag */
97                   (0 << 6)  | /* disable slice-level clock gating */
98                   (0 << 5)  | /* disable clock gating for NOA */
99                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
100                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
101                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
102                   (0 << 1)  |
103                   (0 << 0));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch, 0);
106
107     ADVANCE_BCS_BATCH(batch);
108 }
109
110 static void
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
112 {
113     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
115
116     BEGIN_BCS_BATCH(batch, 6);
117
118     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119     OUT_BCS_BATCH(batch, 0);
120     OUT_BCS_BATCH(batch,
121                   ((mfc_context->surface_state.height - 1) << 19) |
122                   ((mfc_context->surface_state.width - 1) << 6));
123     OUT_BCS_BATCH(batch,
124                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126                   (0 << 22) | /* surface object control state, FIXME??? */
127                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128                   (0 << 2)  | /* must be 0 for interleave U/V */
129                   (1 << 1)  | /* must be y-tiled */
130                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
131     OUT_BCS_BATCH(batch,
132                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
133                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
134     OUT_BCS_BATCH(batch, 0);
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
140 {
141     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
143
144     BEGIN_BCS_BATCH(batch, 6);
145
146     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147     OUT_BCS_BATCH(batch, 0);
148     OUT_BCS_BATCH(batch,
149                   ((mfc_context->surface_state.height - 1) << 18) |
150                   ((mfc_context->surface_state.width - 1) << 4));
151     OUT_BCS_BATCH(batch,
152                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154                   (0 << 22) | /* surface object control state, FIXME??? */
155                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156                   (0 << 2)  | /* must be 0 for interleave U/V */
157                   (1 << 1)  | /* must be tiled */
158                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
159     OUT_BCS_BATCH(batch,
160                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
161                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
162     OUT_BCS_BATCH(batch, 0);
163     ADVANCE_BCS_BATCH(batch);
164 }
165
166 static void
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
168 {
169     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
171     int i;
172
173     BEGIN_BCS_BATCH(batch, 24);
174
175     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176
177     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
178
179     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                   0);                                                                                   /* post output addr  */ 
182
183     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                   0);                                                                                   /* uncompressed data */
186     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                                   0);                                                                                   /* StreamOut data*/
189     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                   0);   
192     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                   0);
195     /* 7..22 Reference pictures*/
196     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197         if ( mfc_context->reference_surfaces[i].bo != NULL) {
198             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200                           0);                   
201         } else {
202             OUT_BCS_BATCH(batch, 0);
203         }
204     }
205     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207                                   0);                                                                                   /* Macroblock status buffer*/
208
209     ADVANCE_BCS_BATCH(batch);
210 }
211
212 static void
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
214 {
215     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
217
218     BEGIN_BCS_BATCH(batch, 11);
219
220     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
221     OUT_BCS_BATCH(batch, 0);
222     OUT_BCS_BATCH(batch, 0);
223     /* MFX Indirect MV Object Base Address */
224     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
225     OUT_BCS_BATCH(batch, 0);    
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
231     OUT_BCS_BATCH(batch, 0);
232     OUT_BCS_BATCH(batch, 0);
233
234     ADVANCE_BCS_BATCH(batch);
235 }
236
237 static void
238 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
239 {
240     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
241     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
242
243     BEGIN_BCS_BATCH(batch, 11);
244
245     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
246     OUT_BCS_BATCH(batch, 0);
247     OUT_BCS_BATCH(batch, 0);
248     /* MFX Indirect MV Object Base Address */
249     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
250     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
251     OUT_BCS_BATCH(batch, 0);
252     OUT_BCS_BATCH(batch, 0);
253     OUT_BCS_BATCH(batch, 0);
254     OUT_BCS_BATCH(batch, 0);
255     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
258
259     ADVANCE_BCS_BATCH(batch);
260 }
261
262 static void
263 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
264 {
265     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
266     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
267
268     BEGIN_BCS_BATCH(batch, 4);
269
270     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
271     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
272                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
273                   0);
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch, 0);
276
277     ADVANCE_BCS_BATCH(batch);
278 }
279
280 static void
281 gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
282 {
283     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
284     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
285
286     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
287     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
288
289     BEGIN_BCS_BATCH(batch, 13);
290     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
291     OUT_BCS_BATCH(batch, 
292                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
293     OUT_BCS_BATCH(batch, 
294                   (height_in_mbs << 16) | 
295                   (width_in_mbs << 0));
296     OUT_BCS_BATCH(batch, 
297                   (0 << 24) |     /*Second Chroma QP Offset*/
298                   (0 << 16) |     /*Chroma QP Offset*/
299                   (0 << 14) |   /*Max-bit conformance Intra flag*/
300                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
301                   (1 << 12) |   /*Should always be written as "1" */
302                   (0 << 10) |   /*QM Preset FLag */
303                   (0 << 8)  |   /*Image Structure*/
304                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
305     OUT_BCS_BATCH(batch,
306                   (400 << 16) |   /*Mininum Frame size*/        
307                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
308                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
309                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
310                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
311                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
312                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
313                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
314                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
315                   (0 << 4)  |   /*Direct 8x8 inference flag*/
316                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
317                   (1 << 2)  |   /*Frame MB only flag*/
318                   (0 << 1)  |   /*MBAFF mode is in active*/
319                   (0 << 0) );   /*Field picture flag*/
320     OUT_BCS_BATCH(batch, 
321                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
322                   (1<<12)   |   
323                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
324                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
325                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
326                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
327                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
328     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
329                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
330                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
331     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
332     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
333     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
334     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
335     OUT_BCS_BATCH(batch, 0x00800001);   
336     OUT_BCS_BATCH(batch, 0);
337
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
343 {
344     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
345     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
346
347     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
348     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
349
350     BEGIN_BCS_BATCH(batch, 16);
351     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
352     OUT_BCS_BATCH(batch,
353                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
354     OUT_BCS_BATCH(batch, 
355                   ((height_in_mbs - 1) << 16) | 
356                   ((width_in_mbs - 1) << 0));
357     OUT_BCS_BATCH(batch, 
358                   (0 << 24) |   /* Second Chroma QP Offset */
359                   (0 << 16) |   /* Chroma QP Offset */
360                   (0 << 14) |   /* Max-bit conformance Intra flag */
361                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
362                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
363                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
364                   (0 << 8)  |   /* FIXME: Image Structure */
365                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
366     OUT_BCS_BATCH(batch,
367                   (0 << 16) |   /* Mininum Frame size */
368                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
369                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
370                   (0 << 13) |   /* CABAC 0 word insertion test enable */
371                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
372                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
373                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
374                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
375                   (0 << 6)  |   /* Only valid for VLD decoding mode */
376                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
377                   (0 << 4)  |   /* Direct 8x8 inference flag */
378                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
379                   (1 << 2)  |   /* Frame MB only flag */
380                   (0 << 1)  |   /* MBAFF mode is in active */
381                   (0 << 0));    /* Field picture flag */
382     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
383     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
384                   (0xBB8 << 16) |       /* InterMbMaxSz */
385                   (0xEE8) );            /* IntraMbMaxSz */
386     OUT_BCS_BATCH(batch, 0);            /* Reserved */
387     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
388     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
389     OUT_BCS_BATCH(batch, 0x8C000000);
390     OUT_BCS_BATCH(batch, 0x00010000);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     OUT_BCS_BATCH(batch, 0);
394     OUT_BCS_BATCH(batch, 0);
395
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
400 {
401     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
402     int i;
403
404     BEGIN_BCS_BATCH(batch, 69);
405
406     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
407     //TODO: reference DMV
408     for(i = 0; i < 16; i++){
409         OUT_BCS_BATCH(batch, 0);
410         OUT_BCS_BATCH(batch, 0);
411     }
412
413     //TODO: current DMV just for test
414 #if 0
415     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
416                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
417                   0);
418 #else
419     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
420     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
421     OUT_BCS_BATCH(batch, 0);
422 #endif
423
424
425     OUT_BCS_BATCH(batch, 0);
426
427     //TODO: POL list
428     for(i = 0; i < 34; i++) {
429         OUT_BCS_BATCH(batch, 0);
430     }
431
432     ADVANCE_BCS_BATCH(batch);
433 }
434
435 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
436                                      int slice_type,
437                                      struct gen6_encoder_context *gen6_encoder_context,
438                                      int rate_control_enable,
439                                      int qp)
440 {
441     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
442     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
443
444     BEGIN_BCS_BATCH(batch, 11);;
445
446     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
447
448         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
449
450     if ( slice_type == SLICE_TYPE_I )
451         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
452     else 
453         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
454
455     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
456                   (qp<<16) |                    /*Slice Quantization Parameter*/
457                   0x0202 );
458     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
459     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
460
461     OUT_BCS_BATCH(batch, 
462                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
463                   (1<<30) |             /*ResetRateControlCounter*/
464                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
465                   (8<<24) |     /*RC Stable Tolerance, middle level*/
466                   (rate_control_enable<<23) |     /*RC Panic Enable*/                  
467                   (0<<22) |     /*QP mode, don't modfiy CBP*/
468                   (1<<19) |     /*IsLastSlice*/
469                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
470                   (1<<17) |         /*HeaderPresentFlag*/       
471                   (1<<16) |         /*SliceData PresentFlag*/
472                   (0<<15) |         /*TailPresentFlag*/
473                   (1<<13) |         /*RBSP NAL TYPE*/   
474                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
475         
476     OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo,
477                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
478                   mfc_context->mfc_indirect_pak_bse_object.offset);
479
480     OUT_BCS_BATCH(batch, (24<<24) |     /*Target QP - 24 is lowest QP*/ 
481                          (20<<16) |     /*Target QP + 20 is highest QP*/
482                          (8<<12)  |
483                          (8<<8)   |
484                          (8<<4)   |
485                          (8<<0));
486     OUT_BCS_BATCH(batch, 0x08888888);   
487     OUT_BCS_BATCH(batch, 0);
488
489     ADVANCE_BCS_BATCH(batch);
490 }
491 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
492 {
493     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
494     int i;
495
496     BEGIN_BCS_BATCH(batch, 58);
497
498     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
499     OUT_BCS_BATCH(batch, 0xFF ) ; 
500     for( i = 0; i < 56; i++) {
501         OUT_BCS_BATCH(batch, 0x10101010); 
502     }   
503
504     ADVANCE_BCS_BATCH(batch);
505 }
506
507 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
508 {
509     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
510     int i;
511
512     BEGIN_BCS_BATCH(batch, 113);
513     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
514
515     for(i = 0; i < 112;i++) {
516         OUT_BCS_BATCH(batch, 0x10001000);
517     }   
518
519     ADVANCE_BCS_BATCH(batch);   
520 }
521
522 static void
523 gen7_mfc_qm_state(VADriverContextP ctx,
524                   int qm_type,
525                   unsigned int *qm,
526                   int qm_length,
527                   struct gen6_encoder_context *gen6_encoder_context)
528 {
529     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
530     unsigned int qm_buffer[16];
531
532     assert(qm_length <= 16);
533     assert(sizeof(*qm) == 4);
534     memcpy(qm_buffer, qm, qm_length * 4);
535
536     BEGIN_BCS_BATCH(batch, 18);
537     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
538     OUT_BCS_BATCH(batch, qm_type << 0);
539     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
540     ADVANCE_BCS_BATCH(batch);
541 }
542
543 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
544 {
545     unsigned int qm[16] = {
546         0x10101010, 0x10101010, 0x10101010, 0x10101010,
547         0x10101010, 0x10101010, 0x10101010, 0x10101010,
548         0x10101010, 0x10101010, 0x10101010, 0x10101010,
549         0x10101010, 0x10101010, 0x10101010, 0x10101010
550     };
551
552     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
553     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
554     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
555     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
556 }
557
558 static void
559 gen7_mfc_fqm_state(VADriverContextP ctx,
560                    int fqm_type,
561                    unsigned int *fqm,
562                    int fqm_length,
563                    struct gen6_encoder_context *gen6_encoder_context)
564 {
565     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
566     unsigned int fqm_buffer[32];
567
568     assert(fqm_length <= 32);
569     assert(sizeof(*fqm) == 4);
570     memcpy(fqm_buffer, fqm, fqm_length * 4);
571
572     BEGIN_BCS_BATCH(batch, 34);
573     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
574     OUT_BCS_BATCH(batch, fqm_type << 0);
575     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
576     ADVANCE_BCS_BATCH(batch);
577 }
578
579 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
580 {
581     unsigned int qm[32] = {
582         0x10001000, 0x10001000, 0x10001000, 0x10001000,
583         0x10001000, 0x10001000, 0x10001000, 0x10001000,
584         0x10001000, 0x10001000, 0x10001000, 0x10001000,
585         0x10001000, 0x10001000, 0x10001000, 0x10001000,
586         0x10001000, 0x10001000, 0x10001000, 0x10001000,
587         0x10001000, 0x10001000, 0x10001000, 0x10001000,
588         0x10001000, 0x10001000, 0x10001000, 0x10001000,
589         0x10001000, 0x10001000, 0x10001000, 0x10001000
590     };
591
592     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
593     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
594     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
595     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
596 }
597
598 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
599 {
600     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
601     int i;
602
603         BEGIN_BCS_BATCH(batch, 10);
604         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
605         OUT_BCS_BATCH(batch, 0);                  //Select L0
606         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
607         for(i = 0; i < 7; i++) {
608                 OUT_BCS_BATCH(batch, 0x80808080);
609         }   
610         ADVANCE_BCS_BATCH(batch);
611
612         BEGIN_BCS_BATCH(batch, 10);
613         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
614         OUT_BCS_BATCH(batch, 1);                  //Select L1
615         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
616         for(i = 0; i < 7; i++) {
617                 OUT_BCS_BATCH(batch, 0x80808080);
618         }   
619         ADVANCE_BCS_BATCH(batch);
620 }
621         
622 static void
623 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
624                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
625                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice)
626 {
627     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
628
629     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
630
631     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
632     OUT_BCS_BATCH(batch,
633                   (0 << 16) |   /* always start at offset 0 */
634                   (data_bits_in_last_dw << 8) |
635                   (skip_emul_byte_count << 4) |
636                   (1 << 3) |    /* FIXME: ??? */
637                   ((!!is_last_header) << 2) |
638                   ((!!is_end_of_slice) << 1) |
639                   (1 << 0));    /* FIXME: ??? */
640
641     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
642     ADVANCE_BCS_BATCH(batch);
643 }
644
645 static int
646 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
647                               struct gen6_encoder_context *gen6_encoder_context,
648                               int intra_mb_size_in_bits)
649 {
650     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
651     int len_in_dwords = 11;
652     unsigned char target_mb_size = intra_mb_size_in_bits / 16;     //In Words
653     unsigned char max_mb_size = target_mb_size * 2 > 255? 255: target_mb_size * 2 ;
654
655     BEGIN_BCS_BATCH(batch, len_in_dwords);
656
657     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 0);
660     OUT_BCS_BATCH(batch, 
661                   (0 << 24) |           /* PackedMvNum, Debug*/
662                   (0 << 20) |           /* No motion vector */
663                   (1 << 19) |           /* CbpDcY */
664                   (1 << 18) |           /* CbpDcU */
665                   (1 << 17) |           /* CbpDcV */
666                   (msg[0] & 0xFFFF) );
667
668     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
669     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
670     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
671
672     /*Stuff for Intra MB*/
673     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
674     OUT_BCS_BATCH(batch, msg[2]);       
675     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
676     
677     /*MaxSizeInWord and TargetSzieInWord*/
678     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
679                          (target_mb_size << 16) );
680
681     ADVANCE_BCS_BATCH(batch);
682
683     return len_in_dwords;
684 }
685
686 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
687                                          struct gen6_encoder_context *gen6_encoder_context,
688                                          int inter_mb_size_in_bits)
689 {
690     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
691     int len_in_dwords = 11;
692     unsigned char target_mb_size = inter_mb_size_in_bits / 16;     //In Words
693     unsigned char max_mb_size = target_mb_size * 16 > 255? 255: target_mb_size * 16 ;
694
695     BEGIN_BCS_BATCH(batch, len_in_dwords);
696
697     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
698
699     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
700     OUT_BCS_BATCH(batch, offset);
701
702     OUT_BCS_BATCH(batch, 
703                   (1 << 24) |     /* PackedMvNum, Debug*/
704                   (4 << 20) |     /* 8 MV, SNB don't use it*/
705                   (1 << 19) |     /* CbpDcY */
706                   (1 << 18) |     /* CbpDcU */
707                   (1 << 17) |     /* CbpDcV */
708                   (0 << 15) |     /* Transform8x8Flag = 0*/
709                   (0 << 14) |     /* Frame based*/
710                   (0 << 13) |     /* Inter MB */
711                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
712                   (0 << 7)  |     /* MBZ for frame */
713                   (0 << 6)  |     /* MBZ */
714                   (2 << 4)  |     /* MBZ for inter*/
715                   (0 << 3)  |     /* MBZ */
716                   (0 << 2)  |     /* SkipMbFlag */
717                   (0 << 0));      /* InterMbMode */
718
719     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
720     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
721     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
722
723     /*Stuff for Inter MB*/
724     OUT_BCS_BATCH(batch, 0x0);        
725     OUT_BCS_BATCH(batch, 0x0);    
726     OUT_BCS_BATCH(batch, 0x0);        
727
728     /*MaxSizeInWord and TargetSzieInWord*/
729     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
730                          (target_mb_size << 16) );
731
732     ADVANCE_BCS_BATCH(batch);
733
734     return len_in_dwords;
735 }
736
737 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
738 {
739     struct i965_driver_data *i965 = i965_driver_data(ctx);
740     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
741     dri_bo *bo;
742     int i;
743
744     /*Encode common setup for MFC*/
745     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
746     mfc_context->post_deblocking_output.bo = NULL;
747
748     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
749     mfc_context->pre_deblocking_output.bo = NULL;
750
751     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
752     mfc_context->uncompressed_picture_source.bo = NULL;
753
754     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
755     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
756
757     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
758         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
759         mfc_context->direct_mv_buffers[i].bo = NULL;
760     }
761
762     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
763         if (mfc_context->reference_surfaces[i].bo != NULL)
764             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
765         mfc_context->reference_surfaces[i].bo = NULL;  
766     }
767
768     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
769     bo = dri_bo_alloc(i965->intel.bufmgr,
770                       "Buffer",
771                       128 * 64,
772                       64);
773     assert(bo);
774     mfc_context->intra_row_store_scratch_buffer.bo = bo;
775
776     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
777     bo = dri_bo_alloc(i965->intel.bufmgr,
778                       "Buffer",
779                       4*9600,
780                       64);
781     assert(bo);
782     mfc_context->macroblock_status_buffer.bo = bo;
783
784     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
785     bo = dri_bo_alloc(i965->intel.bufmgr,
786                       "Buffer",
787                       49152,  /* 6 * 128 * 64 */
788                       64);
789     assert(bo);
790     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
791
792     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
793     bo = dri_bo_alloc(i965->intel.bufmgr,
794                       "Buffer",
795                       12288, /* 1.5 * 128 * 64 */
796                       0x1000);
797     assert(bo);
798     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
799 }
800
801 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
802                                       struct encode_state *encode_state,
803                                       struct gen6_encoder_context *gen6_encoder_context)
804 {
805     struct i965_driver_data *i965 = i965_driver_data(ctx);
806     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
807     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
808     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
809     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param_ext->buffer;
810     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
811     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
812     VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
813     unsigned int *msg = NULL, offset = 0;
814     int emit_new_state = 1, object_len_in_bytes;
815     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
816     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
817     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
818     int x,y;
819     int rate_control_mode = pSequenceParameter->rate_control_method; 
820     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
821     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / fps / width_in_mbs / height_in_mbs;
822     int intra_mb_size = inter_mb_size * 5.0;
823     int qp = pPicParameter->pic_init_qp;
824     unsigned char *slice_header = NULL;
825     int slice_header_length_in_bits = 0;
826
827     if (encode_state->dec_ref_pic_marking)
828         pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
829
830     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
831
832     if ( rate_control_mode != 2) {
833         qp = 26;
834         if ( intra_mb_size > 384*8)         //ONE MB raw data is 384 bytes
835             intra_mb_size = 384*8;
836         if ( inter_mb_size > 256*8)
837             intra_mb_size = 256*8;
838     }
839
840     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
841     
842     if (is_intra) {
843         dri_bo_map(vme_context->vme_output.bo , 1);
844         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
845     }
846
847     for (y = 0; y < height_in_mbs; y++) {
848         for (x = 0; x < width_in_mbs; x++) { 
849             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
850             
851             if (emit_new_state) {
852                 intel_batchbuffer_emit_mi_flush(batch);
853                 
854                 if (IS_GEN7(i965->intel.device_id)) {
855                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
856                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
857                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
858                 } else {
859                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
860                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
861                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
862                 }
863
864                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
865                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
866
867                 if (IS_GEN7(i965->intel.device_id)) {
868                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
869                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
870                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
871                 } else {
872                     gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
873                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
874                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
875                 }
876
877                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
878                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, gen6_encoder_context, rate_control_mode == 0, qp);
879                 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
880                                            (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
881                                            5, 1, 0); /* first 5 bytes are start code + nal unit type */
882                 emit_new_state = 0;
883             }
884
885             if (is_intra) {
886                 assert(msg);
887                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, intra_mb_size);
888                 msg += 4;
889             } else {
890                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, inter_mb_size);
891                 offset += 64;
892             }
893
894             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
895                 intel_batchbuffer_end_atomic(batch);
896                 intel_batchbuffer_flush(batch);
897                 emit_new_state = 1;
898                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
899             }
900         }
901     }
902
903     if (is_intra)
904         dri_bo_unmap(vme_context->vme_output.bo);
905
906     free(slice_header);
907
908     intel_batchbuffer_end_atomic(batch);
909 }
910
911 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
912                                      struct encode_state *encode_state,
913                                      struct gen6_encoder_context *gen6_encoder_context)
914 {
915     struct i965_driver_data *i965 = i965_driver_data(ctx);
916     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
917     struct object_surface *obj_surface; 
918     struct object_buffer *obj_buffer;
919     dri_bo *bo;
920     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
921     VAStatus vaStatus = VA_STATUS_SUCCESS;
922         int i;
923
924     /*Setup all the input&output object*/
925     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
926     assert(obj_surface);
927     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
928
929     mfc_context->post_deblocking_output.bo = obj_surface->bo;
930     dri_bo_reference(mfc_context->post_deblocking_output.bo);
931
932     mfc_context->surface_state.width = obj_surface->orig_width;
933     mfc_context->surface_state.height = obj_surface->orig_height;
934     mfc_context->surface_state.w_pitch = obj_surface->width;
935     mfc_context->surface_state.h_pitch = obj_surface->height;
936
937     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
938                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
939                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
940                         assert(obj_surface);
941                         if (obj_surface->bo != NULL) {
942                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
943                                 dri_bo_reference(obj_surface->bo);
944                         }
945                 } else {
946                         break;
947                 }
948         }
949         
950     obj_surface = SURFACE(encode_state->current_render_target);
951     assert(obj_surface && obj_surface->bo);
952     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
953     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
954
955     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
956     bo = obj_buffer->buffer_store->bo;
957     assert(bo);
958     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
959     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
960     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
961
962     /*Programing bcs pipeline*/
963     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
964         
965     return vaStatus;
966 }
967
968 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
969                              struct encode_state *encode_state,
970                              struct gen6_encoder_context *gen6_encoder_context)
971 {
972     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
973
974     intel_batchbuffer_flush(batch);             //run the pipeline
975
976     return VA_STATUS_SUCCESS;
977 }
978
979 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
980                               struct encode_state *encode_state,
981                               struct gen6_encoder_context *gen6_encoder_context)
982 {
983 #if 0
984     struct i965_driver_data *i965 = i965_driver_data(ctx);
985     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
986         
987     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param_ext->buffer;
988         
989     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
990     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
991     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
992     my_debug(obj_surface);
993
994 #endif
995
996     return VA_STATUS_SUCCESS;
997 }
998
999 static VAStatus
1000 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
1001                             struct encode_state *encode_state,
1002                             struct gen6_encoder_context *gen6_encoder_context)
1003 {
1004     gen6_mfc_init(ctx, gen6_encoder_context);
1005     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1006     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1007     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
1008
1009     return VA_STATUS_SUCCESS;
1010 }
1011
1012 VAStatus
1013 gen6_mfc_pipeline(VADriverContextP ctx,
1014                   VAProfile profile,
1015                   struct encode_state *encode_state,
1016                   struct gen6_encoder_context *gen6_encoder_context)
1017 {
1018     VAStatus vaStatus;
1019
1020     switch (profile) {
1021     case VAProfileH264Baseline:
1022         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1023         break;
1024
1025         /* FIXME: add for other profile */
1026     default:
1027         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1028         break;
1029     }
1030
1031     return vaStatus;
1032 }
1033
1034 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1035 {
1036     return True;
1037 }
1038
1039 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1040 {
1041     int i;
1042
1043     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1044     mfc_context->post_deblocking_output.bo = NULL;
1045
1046     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1047     mfc_context->pre_deblocking_output.bo = NULL;
1048
1049     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1050     mfc_context->uncompressed_picture_source.bo = NULL;
1051
1052     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1053     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1054
1055     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1056         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1057         mfc_context->direct_mv_buffers[i].bo = NULL;
1058     }
1059
1060     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1061     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1062
1063         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1064         mfc_context->macroblock_status_buffer.bo = NULL;
1065
1066     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1067     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1068
1069     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1070     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1071
1072     return True;
1073 }