Enable HW bit rate control in MFC, MB level QP auto adjustment.
[platform/upstream/libva-intel-driver.git] / src / gen6_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhou Chang <chang.zhou@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "assert.h"
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40
41 static void
42 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
43 {
44     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
45
46     BEGIN_BCS_BATCH(batch, 4);
47
48     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
49     OUT_BCS_BATCH(batch,
50                   (0 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
51                   (1 << 9)  | /* Post Deblocking Output */
52                   (0 << 8)  | /* Pre Deblocking Output */
53                   (0 << 7)  | /* disable TLB prefectch */
54                   (0 << 5)  | /* not in stitch mode */
55                   (1 << 4)  | /* encoding mode */
56                   (2 << 0));  /* Standard Select: AVC */
57     OUT_BCS_BATCH(batch,
58                   (0 << 20) | /* round flag in PB slice */
59                   (0 << 19) | /* round flag in Intra8x8 */
60                   (0 << 7)  | /* expand NOA bus flag */
61                   (1 << 6)  | /* must be 1 */
62                   (0 << 5)  | /* disable clock gating for NOA */
63                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
64                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
65                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
66                   (0 << 1)  | /* AVC long field motion vector */
67                   (0 << 0));  /* always calculate AVC ILDB boundary strength */
68     OUT_BCS_BATCH(batch, 0);
69
70     ADVANCE_BCS_BATCH(batch);
71 }
72
73 static void
74 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
75                           int standard_select,
76                           struct gen6_encoder_context *gen6_encoder_context)
77 {
78     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
79
80     assert(standard_select == MFX_FORMAT_MPEG2 ||
81            standard_select == MFX_FORMAT_AVC);
82
83     BEGIN_BCS_BATCH(batch, 5);
84     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
85     OUT_BCS_BATCH(batch,
86                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
87                   (MFD_MODE_VLD << 15) | /* VLD mode */
88                   (0 << 10) | /* disable Stream-Out */
89                   (1 << 9)  | /* Post Deblocking Output */
90                   (0 << 8)  | /* Pre Deblocking Output */
91                   (0 << 5)  | /* not in stitch mode */
92                   (1 << 4)  | /* encoding mode */
93                   (standard_select << 0));  /* standard select: avc or mpeg2 */
94     OUT_BCS_BATCH(batch,
95                   (0 << 7)  | /* expand NOA bus flag */
96                   (0 << 6)  | /* disable slice-level clock gating */
97                   (0 << 5)  | /* disable clock gating for NOA */
98                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
99                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
100                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
101                   (0 << 1)  |
102                   (0 << 0));
103     OUT_BCS_BATCH(batch, 0);
104     OUT_BCS_BATCH(batch, 0);
105
106     ADVANCE_BCS_BATCH(batch);
107 }
108
109 static void
110 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
111 {
112     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
113     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
114
115     BEGIN_BCS_BATCH(batch, 6);
116
117     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
118     OUT_BCS_BATCH(batch, 0);
119     OUT_BCS_BATCH(batch,
120                   ((mfc_context->surface_state.height - 1) << 19) |
121                   ((mfc_context->surface_state.width - 1) << 6));
122     OUT_BCS_BATCH(batch,
123                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
124                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
125                   (0 << 22) | /* surface object control state, FIXME??? */
126                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
127                   (0 << 2)  | /* must be 0 for interleave U/V */
128                   (1 << 1)  | /* must be y-tiled */
129                   (I965_TILEWALK_YMAJOR << 0));                         /* tile walk, TILEWALK_YMAJOR */
130     OUT_BCS_BATCH(batch,
131                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
132                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
133     OUT_BCS_BATCH(batch, 0);
134     ADVANCE_BCS_BATCH(batch);
135 }
136
137 static void
138 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
139 {
140     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
141     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
142
143     BEGIN_BCS_BATCH(batch, 6);
144
145     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
146     OUT_BCS_BATCH(batch, 0);
147     OUT_BCS_BATCH(batch,
148                   ((mfc_context->surface_state.height - 1) << 18) |
149                   ((mfc_context->surface_state.width - 1) << 4));
150     OUT_BCS_BATCH(batch,
151                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
152                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
153                   (0 << 22) | /* surface object control state, FIXME??? */
154                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
155                   (0 << 2)  | /* must be 0 for interleave U/V */
156                   (1 << 1)  | /* must be tiled */
157                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
158     OUT_BCS_BATCH(batch,
159                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
160                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
161     OUT_BCS_BATCH(batch, 0);
162     ADVANCE_BCS_BATCH(batch);
163 }
164
165 static void
166 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
167 {
168     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
169     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
170     int i;
171
172     BEGIN_BCS_BATCH(batch, 24);
173
174     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
175
176     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
177
178     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
179                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
180                   0);                                                                                   /* post output addr  */ 
181
182     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
183                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
184                   0);                                                                                   /* uncompressed data */
185     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
186                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                                   0);                                                                                   /* StreamOut data*/
188     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
189                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
190                   0);   
191     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
192                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
193                   0);
194     /* 7..22 Reference pictures*/
195     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
196         if ( mfc_context->reference_surfaces[i].bo != NULL) {
197             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
198                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199                           0);                   
200         } else {
201             OUT_BCS_BATCH(batch, 0);
202         }
203     }
204     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
205                                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206                                   0);                                                                                   /* Macroblock status buffer*/
207
208     ADVANCE_BCS_BATCH(batch);
209 }
210
211 static void
212 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
213 {
214     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
215     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
216
217     BEGIN_BCS_BATCH(batch, 11);
218
219     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
220     OUT_BCS_BATCH(batch, 0);
221     OUT_BCS_BATCH(batch, 0);
222     /* MFX Indirect MV Object Base Address */
223     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
224     OUT_BCS_BATCH(batch, 0);    
225     OUT_BCS_BATCH(batch, 0);
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
230     OUT_BCS_BATCH(batch, 0);
231     OUT_BCS_BATCH(batch, 0);
232
233     ADVANCE_BCS_BATCH(batch);
234 }
235
236 static void
237 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
238 {
239     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
240     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
241
242     BEGIN_BCS_BATCH(batch, 11);
243
244     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
245     OUT_BCS_BATCH(batch, 0);
246     OUT_BCS_BATCH(batch, 0);
247     /* MFX Indirect MV Object Base Address */
248     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
249     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
250     OUT_BCS_BATCH(batch, 0);
251     OUT_BCS_BATCH(batch, 0);
252     OUT_BCS_BATCH(batch, 0);
253     OUT_BCS_BATCH(batch, 0);
254     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
257
258     ADVANCE_BCS_BATCH(batch);
259 }
260
261 static void
262 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
263 {
264     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
265     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
266
267     BEGIN_BCS_BATCH(batch, 4);
268
269     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
270     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
271                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
272                   0);
273     OUT_BCS_BATCH(batch, 0);
274     OUT_BCS_BATCH(batch, 0);
275
276     ADVANCE_BCS_BATCH(batch);
277 }
278
279 static void
280 gen6_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
281 {
282     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
283     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
284
285     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
286     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
287
288     BEGIN_BCS_BATCH(batch, 13);
289     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
290     OUT_BCS_BATCH(batch, 
291                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
292     OUT_BCS_BATCH(batch, 
293                   (height_in_mbs << 16) | 
294                   (width_in_mbs << 0));
295     OUT_BCS_BATCH(batch, 
296                   (0 << 24) |     /*Second Chroma QP Offset*/
297                   (0 << 16) |     /*Chroma QP Offset*/
298                   (0 << 14) |   /*Max-bit conformance Intra flag*/
299                   (0 << 13) |   /*Max Macroblock size conformance Inter flag*/
300                   (1 << 12) |   /*Should always be written as "1" */
301                   (0 << 10) |   /*QM Preset FLag */
302                   (0 << 8)  |   /*Image Structure*/
303                   (0 << 0) );   /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
304     OUT_BCS_BATCH(batch,
305                   (400 << 16) |   /*Mininum Frame size*/        
306                   (0 << 15) |   /*Disable reading of Macroblock Status Buffer*/
307                   (0 << 14) |   /*Load BitStream Pointer only once, 1 slic 1 frame*/
308                   (0 << 13) |   /*CABAC 0 word insertion test enable*/
309                   (1 << 12) |   /*MVUnpackedEnable,compliant to DXVA*/
310                   (1 << 10) |   /*Chroma Format IDC, 4:2:0*/
311                   (1 << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
312                   (0 << 6)  |   /*Only valid for VLD decoding mode*/
313                   (0 << 5)  |   /*Constrained Intra Predition Flag, from PPS*/
314                   (0 << 4)  |   /*Direct 8x8 inference flag*/
315                   (0 << 3)  |   /*Only 8x8 IDCT Transform Mode Flag*/
316                   (1 << 2)  |   /*Frame MB only flag*/
317                   (0 << 1)  |   /*MBAFF mode is in active*/
318                   (0 << 0) );   /*Field picture flag*/
319     OUT_BCS_BATCH(batch, 
320                   (1<<16)   |   /*Frame Size Rate Control Flag*/  
321                   (1<<12)   |   
322                   (1<<9)    |   /*MB level Rate Control Enabling Flag*/
323                   (1 << 3)  |   /*FrameBitRateMinReportMask*/
324                   (1 << 2)  |   /*FrameBitRateMaxReportMask*/
325                   (1 << 1)  |   /*InterMBMaxSizeReportMask*/
326                   (1 << 0) );   /*IntraMBMaxSizeReportMask*/
327     OUT_BCS_BATCH(batch,                        /*Inter and Intra Conformance Max size limit*/
328                   (0x0600 << 16) |              /*InterMbMaxSz 192 Byte*/
329                   (0x0800) );                   /*IntraMbMaxSz 256 Byte*/
330     OUT_BCS_BATCH(batch, 0x00000000);   /*Reserved : MBZReserved*/
331     OUT_BCS_BATCH(batch, 0x01020304);   /*Slice QP Delta for bitrate control*/                  
332     OUT_BCS_BATCH(batch, 0xFEFDFCFB);           
333     OUT_BCS_BATCH(batch, 0x80601004);   /*MAX = 128KB, MIN = 64KB*/
334     OUT_BCS_BATCH(batch, 0x00800001);   
335     OUT_BCS_BATCH(batch, 0);
336
337     ADVANCE_BCS_BATCH(batch);
338 }
339
340 static void
341 gen7_mfc_avc_img_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
342 {
343     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
344     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
345
346     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
347     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
348
349     BEGIN_BCS_BATCH(batch, 16);
350     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
351     OUT_BCS_BATCH(batch,
352                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
353     OUT_BCS_BATCH(batch, 
354                   ((height_in_mbs - 1) << 16) | 
355                   ((width_in_mbs - 1) << 0));
356     OUT_BCS_BATCH(batch, 
357                   (0 << 24) |   /* Second Chroma QP Offset */
358                   (0 << 16) |   /* Chroma QP Offset */
359                   (0 << 14) |   /* Max-bit conformance Intra flag */
360                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
361                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
362                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
363                   (0 << 8)  |   /* FIXME: Image Structure */
364                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
365     OUT_BCS_BATCH(batch,
366                   (0 << 16) |   /* Mininum Frame size */
367                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
368                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
369                   (0 << 13) |   /* CABAC 0 word insertion test enable */
370                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
371                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
372                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
373                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
374                   (0 << 6)  |   /* Only valid for VLD decoding mode */
375                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
376                   (0 << 4)  |   /* Direct 8x8 inference flag */
377                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
378                   (1 << 2)  |   /* Frame MB only flag */
379                   (0 << 1)  |   /* MBAFF mode is in active */
380                   (0 << 0));    /* Field picture flag */
381     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
382     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
383                   (0xBB8 << 16) |       /* InterMbMaxSz */
384                   (0xEE8) );            /* IntraMbMaxSz */
385     OUT_BCS_BATCH(batch, 0);            /* Reserved */
386     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
387     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
388     OUT_BCS_BATCH(batch, 0x8C000000);
389     OUT_BCS_BATCH(batch, 0x00010000);
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     OUT_BCS_BATCH(batch, 0);
394
395     ADVANCE_BCS_BATCH(batch);
396 }
397
398 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
399 {
400     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
401     int i;
402
403     BEGIN_BCS_BATCH(batch, 69);
404
405     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
406     //TODO: reference DMV
407     for(i = 0; i < 16; i++){
408         OUT_BCS_BATCH(batch, 0);
409         OUT_BCS_BATCH(batch, 0);
410     }
411
412     //TODO: current DMV just for test
413 #if 0
414     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
415                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
416                   0);
417 #else
418     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
419     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
420     OUT_BCS_BATCH(batch, 0);
421 #endif
422
423
424     OUT_BCS_BATCH(batch, 0);
425
426     //TODO: POL list
427     for(i = 0; i < 34; i++) {
428         OUT_BCS_BATCH(batch, 0);
429     }
430
431     ADVANCE_BCS_BATCH(batch);
432 }
433
434 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
435                                      int slice_type,
436                                      struct gen6_encoder_context *gen6_encoder_context,
437                                      int rate_control_enable)
438 {
439     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
440     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
441
442     BEGIN_BCS_BATCH(batch, 11);;
443
444     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
445
446         OUT_BCS_BATCH(batch, slice_type);                       /*Slice Type: I:P:B Slice*/
447
448     if ( slice_type == SLICE_TYPE_I )
449         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
450     else 
451         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
452
453     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
454                   (26<<16) |                    /*Slice Quantization Parameter*/
455                   0x0202 );
456     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
457     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
458
459     OUT_BCS_BATCH(batch, 
460                   (rate_control_enable<<31) |           /*in CBR mode RateControlCounterEnable = enable*/
461                   (1<<30) |             /*ResetRateControlCounter*/
462                   (0<<28) |             /*RC Triggle Mode = Always Rate Control*/
463                   (8<<24) |     /*RC Stable Tolerance, middle level*/
464                   (rate_control_enable<<23) |     /*RC Panic Enable*/                  
465                   (0<<22) |     /*QP mode, don't modfiy CBP*/
466                   (1<<19) |     /*IsLastSlice*/
467                   (0<<18) |     /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
468                   (0<<17) |         /*HeaderPresentFlag*/       
469                   (1<<16) |         /*SliceData PresentFlag*/
470                   (0<<15) |         /*TailPresentFlag*/
471                   (1<<13) |         /*RBSP NAL TYPE*/   
472                   (0<<12) );    /*CabacZeroWordInsertionEnable*/
473         
474     OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo,
475                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
476                   mfc_context->mfc_indirect_pak_bse_object.offset);
477
478     OUT_BCS_BATCH(batch, (16<<24) |     /*Target QP + 16 is lowest QP*/ 
479                          (16<<16) |     /*Target QP + 16 is highest QP*/
480                          (8<<12)  |
481                          (8<<8)   |
482                          (8<<4)   |
483                          (8<<0));
484     OUT_BCS_BATCH(batch, 0x08888888);   
485     OUT_BCS_BATCH(batch, 0);
486
487     ADVANCE_BCS_BATCH(batch);
488 }
489 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
490 {
491     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
492     int i;
493
494     BEGIN_BCS_BATCH(batch, 58);
495
496     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
497     OUT_BCS_BATCH(batch, 0xFF ) ; 
498     for( i = 0; i < 56; i++) {
499         OUT_BCS_BATCH(batch, 0x10101010); 
500     }   
501
502     ADVANCE_BCS_BATCH(batch);
503 }
504
505 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
506 {
507     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
508     int i;
509
510     BEGIN_BCS_BATCH(batch, 113);
511     OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
512
513     for(i = 0; i < 112;i++) {
514         OUT_BCS_BATCH(batch, 0x10001000);
515     }   
516
517     ADVANCE_BCS_BATCH(batch);   
518 }
519
520 static void
521 gen7_mfc_qm_state(VADriverContextP ctx,
522                   int qm_type,
523                   unsigned int *qm,
524                   int qm_length,
525                   struct gen6_encoder_context *gen6_encoder_context)
526 {
527     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
528     unsigned int qm_buffer[16];
529
530     assert(qm_length <= 16);
531     assert(sizeof(*qm) == 4);
532     memcpy(qm_buffer, qm, qm_length * 4);
533
534     BEGIN_BCS_BATCH(batch, 18);
535     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
536     OUT_BCS_BATCH(batch, qm_type << 0);
537     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
538     ADVANCE_BCS_BATCH(batch);
539 }
540
541 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
542 {
543     unsigned int qm[16] = {
544         0x10101010, 0x10101010, 0x10101010, 0x10101010,
545         0x10101010, 0x10101010, 0x10101010, 0x10101010,
546         0x10101010, 0x10101010, 0x10101010, 0x10101010,
547         0x10101010, 0x10101010, 0x10101010, 0x10101010
548     };
549
550     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
551     gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
552     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
553     gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
554 }
555
556 static void
557 gen7_mfc_fqm_state(VADriverContextP ctx,
558                    int fqm_type,
559                    unsigned int *fqm,
560                    int fqm_length,
561                    struct gen6_encoder_context *gen6_encoder_context)
562 {
563     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
564     unsigned int fqm_buffer[32];
565
566     assert(fqm_length <= 32);
567     assert(sizeof(*fqm) == 4);
568     memcpy(fqm_buffer, fqm, fqm_length * 4);
569
570     BEGIN_BCS_BATCH(batch, 34);
571     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
572     OUT_BCS_BATCH(batch, fqm_type << 0);
573     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
578 {
579     unsigned int qm[32] = {
580         0x10001000, 0x10001000, 0x10001000, 0x10001000,
581         0x10001000, 0x10001000, 0x10001000, 0x10001000,
582         0x10001000, 0x10001000, 0x10001000, 0x10001000,
583         0x10001000, 0x10001000, 0x10001000, 0x10001000,
584         0x10001000, 0x10001000, 0x10001000, 0x10001000,
585         0x10001000, 0x10001000, 0x10001000, 0x10001000,
586         0x10001000, 0x10001000, 0x10001000, 0x10001000,
587         0x10001000, 0x10001000, 0x10001000, 0x10001000
588     };
589
590     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
591     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
592     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
593     gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
594 }
595
596 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
597 {
598     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
599     int i;
600
601         BEGIN_BCS_BATCH(batch, 10);
602         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
603         OUT_BCS_BATCH(batch, 0);                  //Select L0
604         OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
605         for(i = 0; i < 7; i++) {
606                 OUT_BCS_BATCH(batch, 0x80808080);
607         }   
608         ADVANCE_BCS_BATCH(batch);
609
610         BEGIN_BCS_BATCH(batch, 10);
611         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
612         OUT_BCS_BATCH(batch, 1);                  //Select L1
613         OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
614         for(i = 0; i < 7; i++) {
615                 OUT_BCS_BATCH(batch, 0x80808080);
616         }   
617         ADVANCE_BCS_BATCH(batch);
618 }
619         
620 static void
621 gen6_mfc_avc_insert_object(VADriverContextP ctx, int flush_data, struct gen6_encoder_context *gen6_encoder_context)
622 {
623     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
624
625     BEGIN_BCS_BATCH(batch, 4);
626
627     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
628     OUT_BCS_BATCH(batch, (32<<8) | 
629                   (1 << 3) |
630                   (1 << 2) |
631                   (flush_data << 1) |
632                   (1<<0) );
633     OUT_BCS_BATCH(batch, 0x00000003);
634     OUT_BCS_BATCH(batch, 0xABCD1234);
635
636     ADVANCE_BCS_BATCH(batch);
637 }
638
639 static int
640 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
641                               struct gen6_encoder_context *gen6_encoder_context,
642                               int intra_mb_size_in_bits)
643 {
644     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
645     int len_in_dwords = 11;
646     unsigned char target_mb_size = intra_mb_size_in_bits / 16;     //In Words
647     unsigned char max_mb_size = target_mb_size * 2;                //In Words
648
649     if ( max_mb_size > 128)
650         max_mb_size = 128;
651     if ( target_mb_size > 96)
652         target_mb_size = 96;
653
654     BEGIN_BCS_BATCH(batch, len_in_dwords);
655
656     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
657     OUT_BCS_BATCH(batch, 0);
658     OUT_BCS_BATCH(batch, 0);
659     OUT_BCS_BATCH(batch, 
660                   (0 << 24) |           /* PackedMvNum, Debug*/
661                   (0 << 20) |           /* No motion vector */
662                   (1 << 19) |           /* CbpDcY */
663                   (1 << 18) |           /* CbpDcU */
664                   (1 << 17) |           /* CbpDcV */
665                   (msg[0] & 0xFFFF) );
666
667     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
668     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
669     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
670
671     /*Stuff for Intra MB*/
672     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
673     OUT_BCS_BATCH(batch, msg[2]);       
674     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
675     
676     /*MaxSizeInWord and TargetSzieInWord*/
677     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
678                          (target_mb_size << 16) );
679
680     ADVANCE_BCS_BATCH(batch);
681
682     return len_in_dwords;
683 }
684
685 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
686                                          struct gen6_encoder_context *gen6_encoder_context,
687                                          int inter_mb_size_in_bits)
688 {
689     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
690     int len_in_dwords = 11;
691     unsigned char target_mb_size = inter_mb_size_in_bits / 16;     //In Words
692     unsigned char max_mb_size = target_mb_size * 2;                //In Words
693
694     if ( max_mb_size > 128)
695         max_mb_size = 128;
696     if ( target_mb_size > 96)
697         target_mb_size = 96;
698
699     BEGIN_BCS_BATCH(batch, len_in_dwords);
700
701     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
702
703     OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
704     OUT_BCS_BATCH(batch, offset);
705
706     OUT_BCS_BATCH(batch, 
707                   (1 << 24) |     /* PackedMvNum, Debug*/
708                   (4 << 20) |     /* 8 MV, SNB don't use it*/
709                   (1 << 19) |     /* CbpDcY */
710                   (1 << 18) |     /* CbpDcU */
711                   (1 << 17) |     /* CbpDcV */
712                   (0 << 15) |     /* Transform8x8Flag = 0*/
713                   (0 << 14) |     /* Frame based*/
714                   (0 << 13) |     /* Inter MB */
715                   (1 << 8)  |     /* MbType = P_L0_16x16 */   
716                   (0 << 7)  |     /* MBZ for frame */
717                   (0 << 6)  |     /* MBZ */
718                   (2 << 4)  |     /* MBZ for inter*/
719                   (0 << 3)  |     /* MBZ */
720                   (0 << 2)  |     /* SkipMbFlag */
721                   (0 << 0));      /* InterMbMode */
722
723     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
724     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
725     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | 26);      /* Last MB */
726
727     /*Stuff for Inter MB*/
728     OUT_BCS_BATCH(batch, 0x0);        
729     OUT_BCS_BATCH(batch, 0x0);    
730     OUT_BCS_BATCH(batch, 0x0);        
731
732     /*MaxSizeInWord and TargetSzieInWord*/
733     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
734                          (target_mb_size << 16) );
735
736     ADVANCE_BCS_BATCH(batch);
737
738     return len_in_dwords;
739 }
740
741 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
742 {
743     struct i965_driver_data *i965 = i965_driver_data(ctx);
744     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
745     dri_bo *bo;
746     int i;
747
748     /*Encode common setup for MFC*/
749     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
750     mfc_context->post_deblocking_output.bo = NULL;
751
752     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
753     mfc_context->pre_deblocking_output.bo = NULL;
754
755     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
756     mfc_context->uncompressed_picture_source.bo = NULL;
757
758     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
759     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
760
761     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
762         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
763         mfc_context->direct_mv_buffers[i].bo = NULL;
764     }
765
766     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
767         if (mfc_context->reference_surfaces[i].bo != NULL)
768             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
769         mfc_context->reference_surfaces[i].bo = NULL;  
770     }
771
772     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
773     bo = dri_bo_alloc(i965->intel.bufmgr,
774                       "Buffer",
775                       128 * 64,
776                       64);
777     assert(bo);
778     mfc_context->intra_row_store_scratch_buffer.bo = bo;
779
780     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
781     bo = dri_bo_alloc(i965->intel.bufmgr,
782                       "Buffer",
783                       4*9600,
784                       64);
785     assert(bo);
786     mfc_context->macroblock_status_buffer.bo = bo;
787
788     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
789     bo = dri_bo_alloc(i965->intel.bufmgr,
790                       "Buffer",
791                       49152,  /* 6 * 128 * 64 */
792                       64);
793     assert(bo);
794     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
795
796     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
797     bo = dri_bo_alloc(i965->intel.bufmgr,
798                       "Buffer",
799                       12288, /* 1.5 * 128 * 64 */
800                       0x1000);
801     assert(bo);
802     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
803 }
804
805 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
806                                       struct encode_state *encode_state,
807                                       struct gen6_encoder_context *gen6_encoder_context)
808 {
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
811     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
812     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
813     VAEncSequenceParameterBufferH264Ext *pSequenceParameter = (VAEncSequenceParameterBufferH264Ext *)encode_state->seq_param->buffer;
814     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param->buffer;
815     VAEncSliceParameterBufferH264Ext *pSliceParameter = (VAEncSliceParameterBufferH264Ext *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
816     
817     unsigned int *msg = NULL, offset = 0;
818     int emit_new_state = 1, object_len_in_bytes;
819     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
820     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
821     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
822     int x,y;
823     int rate_control_mode = pSequenceParameter->rate_control_method; 
824     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
825
826     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / fps / width_in_mbs / height_in_mbs;
827     int intra_mb_size = inter_mb_size * 5.0;
828     int qp = pPicParameter->pic_init_qp;
829
830     if ( rate_control_mode != 2)
831         qp = 26;
832
833     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
834     
835     if (is_intra) {
836         dri_bo_map(vme_context->vme_output.bo , 1);
837         msg = (unsigned int *)vme_context->vme_output.bo->virtual;
838     }
839
840     for (y = 0; y < height_in_mbs; y++) {
841         for (x = 0; x < width_in_mbs; x++) { 
842             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
843             
844             if (emit_new_state) {
845                 intel_batchbuffer_emit_mi_flush(batch);
846                 
847                 if (IS_GEN7(i965->intel.device_id)) {
848                     gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
849                     gen7_mfc_surface_state(ctx, gen6_encoder_context);
850                     gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
851                 } else {
852                     gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
853                     gen6_mfc_surface_state(ctx, gen6_encoder_context);
854                     gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
855                 }
856
857                 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
858                 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
859
860                 if (IS_GEN7(i965->intel.device_id)) {
861                     gen7_mfc_avc_img_state(ctx, gen6_encoder_context);
862                     gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
863                     gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
864                 } else {
865                     gen6_mfc_avc_img_state(ctx, gen6_encoder_context);
866                     gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
867                     gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
868                 }
869
870                 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
871                 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type, gen6_encoder_context, rate_control_mode == 0);
872                 emit_new_state = 0;
873             }
874
875             if (is_intra) {
876                 assert(msg);
877                 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, intra_mb_size);
878                 msg += 4;
879             } else {
880                 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, inter_mb_size);
881                 offset += 64;
882             }
883
884             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
885                 intel_batchbuffer_end_atomic(batch);
886                 intel_batchbuffer_flush(batch);
887                 emit_new_state = 1;
888                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
889             }
890         }
891     }
892
893     if (is_intra)
894         dri_bo_unmap(vme_context->vme_output.bo);
895         
896     intel_batchbuffer_end_atomic(batch);
897 }
898
899 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx, 
900                                      struct encode_state *encode_state,
901                                      struct gen6_encoder_context *gen6_encoder_context)
902 {
903     struct i965_driver_data *i965 = i965_driver_data(ctx);
904     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
905     struct object_surface *obj_surface; 
906     struct object_buffer *obj_buffer;
907     dri_bo *bo;
908     VAEncPictureParameterBufferH264Ext *pPicParameter = (VAEncPictureParameterBufferH264Ext *)encode_state->pic_param->buffer;
909     VAStatus vaStatus = VA_STATUS_SUCCESS;
910         int i;
911
912     /*Setup all the input&output object*/
913     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
914     assert(obj_surface);
915     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
916
917     mfc_context->post_deblocking_output.bo = obj_surface->bo;
918     dri_bo_reference(mfc_context->post_deblocking_output.bo);
919
920     mfc_context->surface_state.width = obj_surface->orig_width;
921     mfc_context->surface_state.height = obj_surface->orig_height;
922     mfc_context->surface_state.w_pitch = obj_surface->width;
923     mfc_context->surface_state.h_pitch = obj_surface->height;
924
925     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
926                 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
927                         obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
928                         assert(obj_surface);
929                         if (obj_surface->bo != NULL) {
930                                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
931                                 dri_bo_reference(obj_surface->bo);
932                         }
933                 } else {
934                         break;
935                 }
936         }
937         
938     obj_surface = SURFACE(encode_state->current_render_target);
939     assert(obj_surface && obj_surface->bo);
940     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
941     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
942
943     obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
944     bo = obj_buffer->buffer_store->bo;
945     assert(bo);
946     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
947     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
948     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
949
950     /*Programing bcs pipeline*/
951     gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context);  //filling the pipeline
952         
953     return vaStatus;
954 }
955
956 static VAStatus gen6_mfc_run(VADriverContextP ctx, 
957                              struct encode_state *encode_state,
958                              struct gen6_encoder_context *gen6_encoder_context)
959 {
960     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
961
962     intel_batchbuffer_flush(batch);             //run the pipeline
963
964     return VA_STATUS_SUCCESS;
965 }
966
967 static VAStatus gen6_mfc_stop(VADriverContextP ctx, 
968                               struct encode_state *encode_state,
969                               struct gen6_encoder_context *gen6_encoder_context)
970 {
971 #if 0
972     struct i965_driver_data *i965 = i965_driver_data(ctx);
973     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
974         
975     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
976         
977     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
978     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
979     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
980     my_debug(obj_surface);
981
982 #endif
983
984     return VA_STATUS_SUCCESS;
985 }
986
987 static VAStatus
988 gen6_mfc_avc_encode_picture(VADriverContextP ctx, 
989                             struct encode_state *encode_state,
990                             struct gen6_encoder_context *gen6_encoder_context)
991 {
992     gen6_mfc_init(ctx, gen6_encoder_context);
993     gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
994     gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
995     gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
996
997     return VA_STATUS_SUCCESS;
998 }
999
1000 VAStatus
1001 gen6_mfc_pipeline(VADriverContextP ctx,
1002                   VAProfile profile,
1003                   struct encode_state *encode_state,
1004                   struct gen6_encoder_context *gen6_encoder_context)
1005 {
1006     VAStatus vaStatus;
1007
1008     switch (profile) {
1009     case VAProfileH264Baseline:
1010         vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1011         break;
1012
1013         /* FIXME: add for other profile */
1014     default:
1015         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1016         break;
1017     }
1018
1019     return vaStatus;
1020 }
1021
1022 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1023 {
1024     return True;
1025 }
1026
1027 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1028 {
1029     int i;
1030
1031     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1032     mfc_context->post_deblocking_output.bo = NULL;
1033
1034     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1035     mfc_context->pre_deblocking_output.bo = NULL;
1036
1037     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1038     mfc_context->uncompressed_picture_source.bo = NULL;
1039
1040     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1041     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1042
1043     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1044         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1045         mfc_context->direct_mv_buffers[i].bo = NULL;
1046     }
1047
1048     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1049     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1050
1051         dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1052         mfc_context->macroblock_status_buffer.bo = NULL;
1053
1054     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1055     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1056
1057     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1058     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1059
1060     return True;
1061 }