Warning fixes
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "assert.h"
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41
42 #define B0_STEP_REV             2
43 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
44
45 static void
46 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
47                            int standard_select,
48                            struct gen6_encoder_context *gen6_encoder_context,
49                            struct intel_batchbuffer *batch)
50 {
51     if (batch == NULL)
52         batch = gen6_encoder_context->base.batch;
53
54     assert(standard_select == MFX_FORMAT_MPEG2 ||
55            standard_select == MFX_FORMAT_AVC);
56
57     BEGIN_BCS_BATCH(batch, 5);
58     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
59     OUT_BCS_BATCH(batch,
60                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
61                   (MFD_MODE_VLD << 15) | /* VLD mode */
62                   (0 << 10) | /* disable Stream-Out */
63                   (1 << 9)  | /* Post Deblocking Output */
64                   (0 << 8)  | /* Pre Deblocking Output */
65                   (0 << 5)  | /* not in stitch mode */
66                   (1 << 4)  | /* encoding mode */
67                   (standard_select << 0));  /* standard select: avc or mpeg2 */
68     OUT_BCS_BATCH(batch,
69                   (0 << 7)  | /* expand NOA bus flag */
70                   (0 << 6)  | /* disable slice-level clock gating */
71                   (0 << 5)  | /* disable clock gating for NOA */
72                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
73                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
74                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
75                   (0 << 1)  |
76                   (0 << 0));
77     OUT_BCS_BATCH(batch, 0);
78     OUT_BCS_BATCH(batch, 0);
79
80     ADVANCE_BCS_BATCH(batch);
81 }
82
83 #define         INTER_MODE_MASK         0x03
84 #define         INTER_8X8               0x03
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen75_mfc_surface_state(VADriverContextP ctx,
93                         struct gen6_encoder_context *gen6_encoder_context,
94                         struct intel_batchbuffer *batch)
95 {
96     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
97
98     if (batch == NULL)
99         batch = gen6_encoder_context->base.batch;
100
101     BEGIN_BCS_BATCH(batch, 6);
102
103     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch,
106                   ((mfc_context->surface_state.height - 1) << 18) |
107                   ((mfc_context->surface_state.width - 1) << 4));
108     OUT_BCS_BATCH(batch,
109                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
110                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
111                   (0 << 22) | /* surface object control state, FIXME??? */
112                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
113                   (0 << 2)  | /* must be 0 for interleave U/V */
114                   (1 << 1)  | /* must be tiled */
115                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
116     OUT_BCS_BATCH(batch,
117                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
118                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
119     OUT_BCS_BATCH(batch, 0);
120     ADVANCE_BCS_BATCH(batch);
121 }
122
123 static void
124 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
125                                     struct gen6_encoder_context *gen6_encoder_context,
126                                     struct intel_batchbuffer *batch)
127 {
128     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
129     int i;
130
131     if (batch == NULL)
132         batch = gen6_encoder_context->base.batch;
133
134     BEGIN_BCS_BATCH(batch, 61);
135
136     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
137
138     /* the DW1-3 is for pre_deblocking */
139         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
140
141         OUT_BCS_BATCH(batch, 0);
142         OUT_BCS_BATCH(batch, 0);
143      /* the DW4-6 is for the post_deblocking */
144
145     if (mfc_context->post_deblocking_output.bo)
146         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
147                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
148                       0);                                                                                       /* post output addr  */ 
149     else
150         OUT_BCS_BATCH(batch, 0);
151
152         OUT_BCS_BATCH(batch, 0);
153         OUT_BCS_BATCH(batch, 0);
154
155      /* the DW7-9 is for the uncompressed_picture */
156     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
157                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
158                   0); /* uncompressed data */
159
160         OUT_BCS_BATCH(batch, 0);
161         OUT_BCS_BATCH(batch, 0);
162
163      /* the DW10-12 is for the mb status */
164         OUT_BCS_BATCH(batch, 0);
165         OUT_BCS_BATCH(batch, 0);
166         OUT_BCS_BATCH(batch, 0);
167
168      /* the DW13-15 is for the intra_row_store_scratch */
169     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
170                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
171                   0);   
172         OUT_BCS_BATCH(batch, 0);
173         OUT_BCS_BATCH(batch, 0);
174
175      /* the DW16-18 is for the deblocking filter */
176     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
177                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                   0);
179         OUT_BCS_BATCH(batch, 0);
180         OUT_BCS_BATCH(batch, 0);
181
182     /* the DW 19-50 is for Reference pictures*/
183     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
184         if ( mfc_context->reference_surfaces[i].bo != NULL) {
185             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
186                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                           0);                   
188         } else {
189             OUT_BCS_BATCH(batch, 0);
190         }
191         OUT_BCS_BATCH(batch, 0);
192     }
193         OUT_BCS_BATCH(batch, 0);
194
195         /* The DW 52-54 is for the MB status buffer */
196         OUT_BCS_BATCH(batch, 0);
197         
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_BATCH(batch, 0);
200
201         /* the DW 55-57 is the ILDB buffer */
202         OUT_BCS_BATCH(batch, 0);
203         OUT_BCS_BATCH(batch, 0);
204         OUT_BCS_BATCH(batch, 0);
205
206         /* the DW 58-60 is the second ILDB buffer */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210     ADVANCE_BCS_BATCH(batch);
211 }
212
213 static void
214 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx,
215                               struct gen6_encoder_context *gen6_encoder_context,
216                               struct intel_batchbuffer *batch)
217 {
218     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
219     int i;
220     struct i965_driver_data *i965 = i965_driver_data(ctx);
221  
222     if (batch == NULL)
223         batch = gen6_encoder_context->base.batch;
224
225     if (IS_STEPPING_BPLUS(i965)) {
226         gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context, batch);
227         return;
228     }
229
230     BEGIN_BCS_BATCH(batch, 25);
231
232     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
233
234     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
235
236     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
237                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238                   0);                                                                                   /* post output addr  */ 
239
240     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
241                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242                   0);                                                                                   /* uncompressed data */
243
244     OUT_BCS_BATCH(batch, 0);                                                                                    /* StreamOut data*/
245     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
246                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
247                   0);   
248     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
249                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
250                   0);
251     /* 7..22 Reference pictures*/
252     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
253         if ( mfc_context->reference_surfaces[i].bo != NULL) {
254             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
255                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
256                           0);                   
257         } else {
258             OUT_BCS_BATCH(batch, 0);
259         }
260     }
261     OUT_BCS_BATCH(batch, 0);                                                                                    /* no block status  */
262
263     OUT_BCS_BATCH(batch, 0);
264
265     ADVANCE_BCS_BATCH(batch);
266 }
267
268
269 static void
270 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
271                                         struct gen6_encoder_context *gen6_encoder_context,
272                                         struct intel_batchbuffer *batch)
273 {
274     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
275     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
276
277     if (batch == NULL)
278         batch = gen6_encoder_context->base.batch;
279
280     BEGIN_BCS_BATCH(batch, 26);
281
282     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
283         /* the DW1-3 is for the MFX indirect bistream offset */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287         /* the DW4-5 is the MFX upper bound */
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     /* the DW6-10 is for MFX Indirect MV Object Base Address */
292     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
296     OUT_BCS_BATCH(batch, 0);
297
298      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304
305      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311
312     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
313     OUT_BCS_RELOC(batch,
314                   mfc_context->mfc_indirect_pak_bse_object.bo,
315                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
316                   0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319         
320     OUT_BCS_BATCH(batch, 0);
321     OUT_BCS_BATCH(batch, 0x00000000);
322
323     ADVANCE_BCS_BATCH(batch);
324 }
325
326 static void
327 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
328                                   struct gen6_encoder_context *gen6_encoder_context,
329                                   struct intel_batchbuffer *batch)
330 {
331     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
332     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
333     struct i965_driver_data *i965 = i965_driver_data(ctx);
334
335     if (batch == NULL)
336         batch = gen6_encoder_context->base.batch;
337
338     if (IS_STEPPING_BPLUS(i965)) {
339         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
340         return;
341     }
342
343     BEGIN_BCS_BATCH(batch, 11);
344
345     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
346     OUT_BCS_BATCH(batch, 0);
347     OUT_BCS_BATCH(batch, 0);
348     /* MFX Indirect MV Object Base Address */
349     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
350     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
351     OUT_BCS_BATCH(batch, 0);
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
356     OUT_BCS_RELOC(batch,
357                   mfc_context->mfc_indirect_pak_bse_object.bo,
358                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359                   0);
360     OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
361
362     ADVANCE_BCS_BATCH(batch);
363 }
364
365 static void
366 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
367                                         struct gen6_encoder_context *gen6_encoder_context,
368                                         struct intel_batchbuffer *batch)
369 {
370     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
371
372     if (batch == NULL)
373         batch = gen6_encoder_context->base.batch;
374
375     BEGIN_BCS_BATCH(batch, 10);
376
377     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
378     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
379                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
380                   0);
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383         
384         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388
389         /* the DW7-9 is for Bitplane Read Buffer Base Address */
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393
394     ADVANCE_BCS_BATCH(batch);
395 }
396
397 static void
398 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
399                                   struct gen6_encoder_context *gen6_encoder_context,
400                                   struct intel_batchbuffer *batch)
401 {
402     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
403     struct i965_driver_data *i965 = i965_driver_data(ctx);
404
405     if (batch == NULL)
406         batch = gen6_encoder_context->base.batch;
407
408     if (IS_STEPPING_BPLUS(i965)) {
409         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
410         return;
411     }
412  
413
414     BEGIN_BCS_BATCH(batch, 4);
415
416     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
417     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
418                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
419                   0);
420     OUT_BCS_BATCH(batch, 0);
421     OUT_BCS_BATCH(batch, 0);
422
423     ADVANCE_BCS_BATCH(batch);
424 }
425
426 static void
427 gen75_mfc_avc_img_state(VADriverContextP ctx,
428                         struct gen6_encoder_context *gen6_encoder_context,
429                         struct intel_batchbuffer *batch)
430 {
431     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
432     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
433     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
434
435     if (batch == NULL)
436         batch = gen6_encoder_context->base.batch;
437
438     BEGIN_BCS_BATCH(batch, 16);
439     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
440     OUT_BCS_BATCH(batch,
441                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
442     OUT_BCS_BATCH(batch, 
443                   ((height_in_mbs - 1) << 16) | 
444                   ((width_in_mbs - 1) << 0));
445     OUT_BCS_BATCH(batch, 
446                   (0 << 24) |   /* Second Chroma QP Offset */
447                   (0 << 16) |   /* Chroma QP Offset */
448                   (0 << 14) |   /* Max-bit conformance Intra flag */
449                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
450                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
451                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
452                   (0 << 8)  |   /* FIXME: Image Structure */
453                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
454     OUT_BCS_BATCH(batch,
455                   (0 << 16) |   /* Mininum Frame size */
456                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
457                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
458                   (0 << 13) |   /* CABAC 0 word insertion test enable */
459                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
460                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
461                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
462                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
463                   (0 << 6)  |   /* Only valid for VLD decoding mode */
464                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
465                   (0 << 4)  |   /* Direct 8x8 inference flag */
466                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
467                   (1 << 2)  |   /* Frame MB only flag */
468                   (0 << 1)  |   /* MBAFF mode is in active */
469                   (0 << 0));    /* Field picture flag */
470     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
471     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
472                   (0xBB8 << 16) |       /* InterMbMaxSz */
473                   (0xEE8) );            /* IntraMbMaxSz */
474     OUT_BCS_BATCH(batch, 0);            /* Reserved */
475     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
476     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
477     OUT_BCS_BATCH(batch, 0x8C000000);
478     OUT_BCS_BATCH(batch, 0x00010000);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     OUT_BCS_BATCH(batch, 0);
482     OUT_BCS_BATCH(batch, 0);
483
484     ADVANCE_BCS_BATCH(batch);
485 }
486
487
488 static void
489 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
490                                      struct gen6_encoder_context *gen6_encoder_context,
491                                      struct intel_batchbuffer *batch)
492 {
493     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
494     int i;
495
496     if (batch == NULL)
497         batch = gen6_encoder_context->base.batch;
498
499     BEGIN_BCS_BATCH(batch, 71);
500
501     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
502
503     /* Reference frames and Current frames */
504     /* the DW1-32 is for the direct MV for reference */
505     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
506         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
507             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
508                           I915_GEM_DOMAIN_INSTRUCTION, 0,
509                           0);
510             OUT_BCS_BATCH(batch, 0);
511         } else {
512             OUT_BCS_BATCH(batch, 0);
513             OUT_BCS_BATCH(batch, 0);
514         }
515     }
516         OUT_BCS_BATCH(batch, 0);
517
518         /* the DW34-36 is the MV for the current reference */
519         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
520                           I915_GEM_DOMAIN_INSTRUCTION, 0,
521                           0);
522
523         OUT_BCS_BATCH(batch, 0);
524         OUT_BCS_BATCH(batch, 0);
525
526     /* POL list */
527     for(i = 0; i < 32; i++) {
528         OUT_BCS_BATCH(batch, i/2);
529     }
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532
533     ADVANCE_BCS_BATCH(batch);
534 }
535
536 static void gen75_mfc_avc_directmode_state(VADriverContextP ctx,
537                                            struct gen6_encoder_context *gen6_encoder_context,
538                                            struct intel_batchbuffer *batch)
539 {
540     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
541     int i;
542     struct i965_driver_data *i965 = i965_driver_data(ctx);
543
544     if (batch == NULL)
545         batch = gen6_encoder_context->base.batch;
546
547     if (IS_STEPPING_BPLUS(i965)) {
548         gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context, batch);
549         return;
550     }
551
552     BEGIN_BCS_BATCH(batch, 69);
553
554     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
555     //TODO: reference DMV
556     for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
557         if (mfc_context->direct_mv_buffers[i].bo)
558                 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
559                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0);
561         else
562                 OUT_BCS_BATCH(batch, 0);
563     }
564
565     //TODO: current DMV just for test
566 #if 0
567     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
568                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
569                   0);
570 #else
571     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
572     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
573     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
574                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                   0);
576 #endif
577
578
579     OUT_BCS_BATCH(batch, 0);
580
581     //TODO: POL list
582     for(i = 0; i < 34; i++) {
583         OUT_BCS_BATCH(batch, 0);
584     }
585
586     ADVANCE_BCS_BATCH(batch);
587 }
588
589 static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
590                                       int intra_slice,
591                                       struct gen6_encoder_context *gen6_encoder_context,
592                                       struct intel_batchbuffer *batch)
593 {
594     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
595
596     if (batch == NULL)
597         batch = gen6_encoder_context->base.batch;
598
599     BEGIN_BCS_BATCH(batch, 11);;
600
601     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
602
603     if ( intra_slice )
604         OUT_BCS_BATCH(batch, 2);                        /*Slice Type: I Slice*/
605     else
606         OUT_BCS_BATCH(batch, 0);                        /*Slice Type: P Slice*/
607
608     if ( intra_slice )
609         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
610     else 
611         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
612
613     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
614                   (26<<16) |                    /*Slice Quantization Parameter*/
615                   0x0202 );
616     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
617     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
618
619     OUT_BCS_BATCH(batch, 
620                   (0<<31) |             /*RateControlCounterEnable = disable*/
621                   (1<<30) |             /*ResetRateControlCounter*/
622                   (2<<28) |             /*RC Triggle Mode = Loose Rate Control*/
623                   (1<<19) |             /*IsLastSlice*/
624                   (0<<18) |             /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
625                   (0<<17) |             /*HeaderPresentFlag*/   
626                   (1<<16) |             /*SliceData PresentFlag*/
627                   (0<<15) |             /*TailPresentFlag*/
628                   (1<<13) |             /*RBSP NAL TYPE*/       
629                   (0<<12) );            /*CabacZeroWordInsertionEnable*/
630         
631
632     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
633
634     OUT_BCS_BATCH(batch, 0);
635     OUT_BCS_BATCH(batch, 0);
636     OUT_BCS_BATCH(batch, 0);
637
638     ADVANCE_BCS_BATCH(batch);
639 }
640
641 static void
642 gen75_mfc_qm_state(VADriverContextP ctx,
643                    int qm_type,
644                    unsigned int *qm,
645                    int qm_length,
646                    struct gen6_encoder_context *gen6_encoder_context,
647                    struct intel_batchbuffer *batch)
648 {
649     unsigned int qm_buffer[16];
650
651     if (batch == NULL)
652         batch = gen6_encoder_context->base.batch;
653
654     assert(qm_length <= 16);
655     assert(sizeof(*qm) == 4);
656     memcpy(qm_buffer, qm, qm_length * 4);
657
658     BEGIN_BCS_BATCH(batch, 18);
659     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
660     OUT_BCS_BATCH(batch, qm_type << 0);
661     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
662     ADVANCE_BCS_BATCH(batch);
663 }
664
665 static void gen75_mfc_avc_qm_state(VADriverContextP ctx,
666                                    struct gen6_encoder_context *gen6_encoder_context,
667                                    struct intel_batchbuffer *batch)
668 {
669     unsigned int qm[16] = {
670         0x10101010, 0x10101010, 0x10101010, 0x10101010,
671         0x10101010, 0x10101010, 0x10101010, 0x10101010,
672         0x10101010, 0x10101010, 0x10101010, 0x10101010,
673         0x10101010, 0x10101010, 0x10101010, 0x10101010
674     };
675
676     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
677     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
678     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
679     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
680 }
681
682 static void
683 gen75_mfc_fqm_state(VADriverContextP ctx,
684                     int fqm_type,
685                     unsigned int *fqm,
686                     int fqm_length,
687                     struct gen6_encoder_context *gen6_encoder_context,
688                     struct intel_batchbuffer *batch)
689 {
690     unsigned int fqm_buffer[32];
691
692     if (batch == NULL)
693         batch = gen6_encoder_context->base.batch;
694
695     assert(fqm_length <= 32);
696     assert(sizeof(*fqm) == 4);
697     memcpy(fqm_buffer, fqm, fqm_length * 4);
698
699     BEGIN_BCS_BATCH(batch, 34);
700     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
701     OUT_BCS_BATCH(batch, fqm_type << 0);
702     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
703     ADVANCE_BCS_BATCH(batch);
704 }
705
706 static void gen75_mfc_avc_fqm_state(VADriverContextP ctx,
707                                     struct gen6_encoder_context *gen6_encoder_context,
708                                     struct intel_batchbuffer *batch)
709 {
710     unsigned int qm[32] = {
711         0x10001000, 0x10001000, 0x10001000, 0x10001000,
712         0x10001000, 0x10001000, 0x10001000, 0x10001000,
713         0x10001000, 0x10001000, 0x10001000, 0x10001000,
714         0x10001000, 0x10001000, 0x10001000, 0x10001000,
715         0x10001000, 0x10001000, 0x10001000, 0x10001000,
716         0x10001000, 0x10001000, 0x10001000, 0x10001000,
717         0x10001000, 0x10001000, 0x10001000, 0x10001000,
718         0x10001000, 0x10001000, 0x10001000, 0x10001000
719     };
720
721     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
722     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
723     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
724     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
725 }
726
727 static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx,
728                                         struct gen6_encoder_context *gen6_encoder_context,
729                                         struct intel_batchbuffer *batch)
730 {
731     int i;
732
733     if (batch == NULL)
734         batch = gen6_encoder_context->base.batch;
735
736     BEGIN_BCS_BATCH(batch, 10);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
739     OUT_BCS_BATCH(batch, 0);                  //Select L0
740
741     OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
742     for(i = 0; i < 7; i++) {
743         OUT_BCS_BATCH(batch, 0x80808080);
744     }
745
746     ADVANCE_BCS_BATCH(batch);
747 }
748         
749 static int
750 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
751                                struct gen6_encoder_context *gen6_encoder_context,
752                                struct intel_batchbuffer *batch)
753 {
754     int len_in_dwords = 12;
755
756     unsigned int intra_msg;
757 #define         INTRA_MSG_FLAG          (1 << 13)
758 #define         INTRA_MBTYPE_MASK       (0x1F0000)
759
760     if (batch == NULL)
761         batch = gen6_encoder_context->base.batch;
762
763     BEGIN_BCS_BATCH(batch, len_in_dwords);
764
765     intra_msg = msg[0] & 0xC0FF;
766     intra_msg |= INTRA_MSG_FLAG;
767     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
768     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
769     OUT_BCS_BATCH(batch, 0);
770     OUT_BCS_BATCH(batch, 0);
771     OUT_BCS_BATCH(batch, 
772                   (0 << 24) |           /* PackedMvNum, Debug*/
773                   (0 << 20) |           /* No motion vector */
774                   (1 << 19) |           /* CbpDcY */
775                   (1 << 18) |           /* CbpDcU */
776                   (1 << 17) |           /* CbpDcV */
777                   intra_msg);
778
779     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
780     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
781     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
782
783     /*Stuff for Intra MB*/
784     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
785     OUT_BCS_BATCH(batch, msg[2]);       
786     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
787
788     OUT_BCS_BATCH(batch, 0x00000);      /*MaxSizeInWord and TargetSzieInWord*/
789         OUT_BCS_BATCH(batch, 0);
790
791     ADVANCE_BCS_BATCH(batch);
792
793     return len_in_dwords;
794 }
795
796 static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
797                                           unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context,
798                                           struct intel_batchbuffer *batch)
799 {
800     int len_in_dwords = 12;
801     unsigned int inter_msg;
802
803     if (batch == NULL)
804         batch = gen6_encoder_context->base.batch;
805
806     BEGIN_BCS_BATCH(batch, len_in_dwords);
807
808     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
809
810         inter_msg = 32;
811         /* MV quantity */
812         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
813                 if (msg[1] & SUBMB_SHAPE_MASK)
814                         inter_msg = 128;
815         }
816     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
817
818     OUT_BCS_BATCH(batch, offset);
819         inter_msg = msg[0] & (0x1F00FFFF);
820         inter_msg |= INTER_MV8;
821         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
822                         (msg[1] & SUBMB_SHAPE_MASK)) {
823                 inter_msg |= INTER_MV32;
824         }
825
826     OUT_BCS_BATCH(batch, inter_msg);
827
828     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
829     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
830     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
831
832     /*Stuff for Inter MB*/
833         inter_msg = msg[1] >> 8;
834     OUT_BCS_BATCH(batch, inter_msg);        
835     OUT_BCS_BATCH(batch, 0x0);    
836     OUT_BCS_BATCH(batch, 0x0);        
837
838     OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
839
840     OUT_BCS_BATCH(batch, 0x0);        
841
842     ADVANCE_BCS_BATCH(batch);
843
844     return len_in_dwords;
845 }
846
847 static void gen75_mfc_init(VADriverContextP ctx,
848                            struct encode_state *encode_state,
849                            struct gen6_encoder_context *gen6_encoder_context)
850 {
851     struct i965_driver_data *i965 = i965_driver_data(ctx);
852     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
853     dri_bo *bo;
854     int i;
855     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
856     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
857
858     /*Encode common setup for MFC*/
859     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
860     mfc_context->post_deblocking_output.bo = NULL;
861
862     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
863     mfc_context->pre_deblocking_output.bo = NULL;
864
865     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
866     mfc_context->uncompressed_picture_source.bo = NULL;
867
868     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
869     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
870
871     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
872         if (mfc_context->reference_surfaces[i].bo != NULL)
873             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
874         mfc_context->reference_surfaces[i].bo = NULL;  
875     }
876
877     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
878     bo = dri_bo_alloc(i965->intel.bufmgr,
879                       "Buffer",
880                       width_in_mbs * 64,
881                       64);
882     assert(bo);
883     mfc_context->intra_row_store_scratch_buffer.bo = bo;
884
885     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
886     bo = dri_bo_alloc(i965->intel.bufmgr,
887                       "Buffer",
888                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
889                       64);
890     assert(bo);
891     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
892
893     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
894     bo = dri_bo_alloc(i965->intel.bufmgr,
895                       "Buffer",
896                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
897                       0x1000);
898     assert(bo);
899     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
900 }
901
902 #define         INTRA_RDO_OFFSET        4
903 #define         INTER_RDO_OFFSET        54
904 #define         INTER_MSG_OFFSET        52
905 #define         INTER_MV_OFFSET         224
906 #define         RDO_MASK                0xFFFF
907
908 static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
909                                       struct encode_state *encode_state,
910                                       struct gen6_encoder_context *gen6_encoder_context)
911 {
912     struct i965_driver_data *i965 = i965_driver_data(ctx);
913     struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
914     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
915     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
916     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
917     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
918     unsigned int *msg = NULL, offset = 0;
919     unsigned char *msg_ptr = NULL;
920     int emit_new_state = 1, object_len_in_bytes;
921     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
922     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
923     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
924     int x,y, mb_index;
925     int inter_rdo, intra_rdo;
926     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
927
928     intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
929
930     dri_bo_map(vme_context->vme_output.bo , 1);
931     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
932     if (is_intra) {
933         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
934     } else {
935         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
936         offset = 0; 
937     }
938
939     for (y = 0; y < height_in_mbs; y++) {
940         for (x = 0; x < width_in_mbs; x++) { 
941             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
942             int qp = pSequenceParameter->initial_qp;
943              mb_index = (y * width_in_mbs) + x;
944             if (emit_new_state) {
945                 intel_batchbuffer_emit_mi_flush(batch);
946                 
947                 gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
948                 gen75_mfc_surface_state(ctx, gen6_encoder_context, batch);
949                 gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
950
951                 gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
952                 gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
953
954                 gen75_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
955                 gen75_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
956                 gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
957                 gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context, batch);
958
959                 gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
960                 gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
961                 emit_new_state = 0;
962             }
963
964             msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
965             if (is_intra) {
966                 object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
967             } else {
968                 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
969                 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
970                 if (intra_rdo < inter_rdo) {
971                     object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
972                 } else {
973                     msg += INTER_MSG_OFFSET;
974                     offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
975                     object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context, batch);
976                 }
977             }
978             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
979                 intel_batchbuffer_end_atomic(batch);
980                 intel_batchbuffer_flush(batch);
981                 emit_new_state = 1;
982                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
983             }
984         }
985     }
986
987     dri_bo_unmap(vme_context->vme_output.bo);
988         
989     intel_batchbuffer_align(batch, 8);
990
991     BEGIN_BCS_BATCH(batch, 2);
992     OUT_BCS_BATCH(batch, 0);
993     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
994     ADVANCE_BCS_BATCH(batch);
995
996     intel_batchbuffer_end_atomic(batch);
997
998     /* chain to the main batch buffer */
999     intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
1000     intel_batchbuffer_emit_mi_flush(main_batch);
1001     BEGIN_BCS_BATCH(main_batch, 2);
1002     OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
1003     OUT_BCS_RELOC(main_batch,
1004                   batch->buffer,
1005                   I915_GEM_DOMAIN_COMMAND, 0,
1006                   0);
1007     ADVANCE_BCS_BATCH(main_batch);
1008     intel_batchbuffer_end_atomic(main_batch);
1009
1010     // end programing             
1011     intel_batchbuffer_free(batch);      
1012 }
1013
1014 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
1015                                      struct encode_state *encode_state,
1016                                      struct gen6_encoder_context *gen6_encoder_context)
1017 {
1018     struct i965_driver_data *i965 = i965_driver_data(ctx);
1019     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1020     struct object_surface *obj_surface; 
1021     struct object_buffer *obj_buffer;
1022     dri_bo *bo;
1023     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1024     VAStatus vaStatus = VA_STATUS_SUCCESS;
1025
1026     /*Setup all the input&output object*/
1027     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1028     assert(obj_surface);
1029     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1030     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1031     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1032
1033     mfc_context->surface_state.width = obj_surface->orig_width;
1034     mfc_context->surface_state.height = obj_surface->orig_height;
1035     mfc_context->surface_state.w_pitch = obj_surface->width;
1036     mfc_context->surface_state.h_pitch = obj_surface->height;
1037
1038     obj_surface = SURFACE(pPicParameter->reference_picture);
1039     assert(obj_surface);
1040     if (obj_surface->bo != NULL) {
1041         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
1042         dri_bo_reference(obj_surface->bo);
1043     }
1044         
1045     obj_surface = SURFACE(encode_state->current_render_target);
1046     assert(obj_surface && obj_surface->bo);
1047     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1048     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1049
1050     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
1051     bo = obj_buffer->buffer_store->bo;
1052     assert(bo);
1053     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1054     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1055     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1056
1057     /*Programing bcs pipeline*/
1058     gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
1059         
1060     return vaStatus;
1061 }
1062
1063 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
1064                              struct encode_state *encode_state,
1065                              struct gen6_encoder_context *gen6_encoder_context)
1066 {
1067     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1068
1069     intel_batchbuffer_flush(batch);             //run the pipeline
1070
1071     return VA_STATUS_SUCCESS;
1072 }
1073
1074 static VAStatus gen75_mfc_stop(VADriverContextP ctx, 
1075                               struct encode_state *encode_state,
1076                               struct gen6_encoder_context *gen6_encoder_context)
1077 {
1078 #if 0
1079     struct i965_driver_data *i965 = i965_driver_data(ctx);
1080     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1081         
1082     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1083         
1084     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1085     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1086     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1087     my_debug(obj_surface);
1088
1089 #endif
1090
1091     return VA_STATUS_SUCCESS;
1092 }
1093
1094 static VAStatus
1095 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1096                             struct encode_state *encode_state,
1097                             struct gen6_encoder_context *gen6_encoder_context)
1098 {
1099     gen75_mfc_init(ctx, encode_state, gen6_encoder_context);
1100     gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1101     gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
1102     gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
1103
1104     return VA_STATUS_SUCCESS;
1105 }
1106
1107 VAStatus
1108 gen75_mfc_pipeline(VADriverContextP ctx,
1109                   VAProfile profile,
1110                   struct encode_state *encode_state,
1111                   struct gen6_encoder_context *gen6_encoder_context)
1112 {
1113     VAStatus vaStatus;
1114
1115     switch (profile) {
1116     case VAProfileH264Baseline:
1117         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1118         break;
1119
1120         /* FIXME: add for other profile */
1121     default:
1122         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1123         break;
1124     }
1125
1126     return vaStatus;
1127 }
1128
1129 Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1130 {
1131     int i;
1132     struct i965_driver_data *i965 = i965_driver_data(ctx);
1133     dri_bo *bo;
1134         
1135     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1136         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1137         mfc_context->direct_mv_buffers[i].bo = NULL;
1138     }
1139     bo = dri_bo_alloc(i965->intel.bufmgr,
1140                         "Buffer",
1141                          68*8192,
1142                          64);
1143     mfc_context->direct_mv_buffers[0].bo = bo;
1144     bo = dri_bo_alloc(i965->intel.bufmgr,
1145                         "Buffer",
1146                          68*8192,
1147                          64);
1148     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
1149     return True;
1150 }
1151
1152 Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1153 {
1154     int i;
1155
1156     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1157     mfc_context->post_deblocking_output.bo = NULL;
1158
1159     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1160     mfc_context->pre_deblocking_output.bo = NULL;
1161
1162     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1163     mfc_context->uncompressed_picture_source.bo = NULL;
1164
1165     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1166     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1167
1168     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1169         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1170         mfc_context->direct_mv_buffers[i].bo = NULL;
1171     }
1172
1173     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1174     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1175
1176     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1177     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1178
1179     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1180     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1181
1182     return True;
1183 }