Encoding: modify function to fill command into a specified batch buffer
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2010-2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34
35 #include "assert.h"
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41
42 #define B0_STEP_REV             2
43 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
44
45 static void
46 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
47                            int standard_select,
48                            struct gen6_encoder_context *gen6_encoder_context,
49                            struct intel_batchbuffer *batch)
50 {
51     if (batch == NULL)
52         batch = gen6_encoder_context->base.batch;
53
54     assert(standard_select == MFX_FORMAT_MPEG2 ||
55            standard_select == MFX_FORMAT_AVC);
56
57     BEGIN_BCS_BATCH(batch, 5);
58     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
59     OUT_BCS_BATCH(batch,
60                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
61                   (MFD_MODE_VLD << 15) | /* VLD mode */
62                   (0 << 10) | /* disable Stream-Out */
63                   (1 << 9)  | /* Post Deblocking Output */
64                   (0 << 8)  | /* Pre Deblocking Output */
65                   (0 << 5)  | /* not in stitch mode */
66                   (1 << 4)  | /* encoding mode */
67                   (standard_select << 0));  /* standard select: avc or mpeg2 */
68     OUT_BCS_BATCH(batch,
69                   (0 << 7)  | /* expand NOA bus flag */
70                   (0 << 6)  | /* disable slice-level clock gating */
71                   (0 << 5)  | /* disable clock gating for NOA */
72                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
73                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
74                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
75                   (0 << 1)  |
76                   (0 << 0));
77     OUT_BCS_BATCH(batch, 0);
78     OUT_BCS_BATCH(batch, 0);
79
80     ADVANCE_BCS_BATCH(batch);
81 }
82
83 #define         INTER_MODE_MASK         0x03
84 #define         INTER_8X8               0x03
85 #define         SUBMB_SHAPE_MASK        0x00FF00
86
87 #define         INTER_MV8               (4 << 20)
88 #define         INTER_MV32              (6 << 20)
89
90
91 static void
92 gen75_mfc_surface_state(VADriverContextP ctx,
93                         struct gen6_encoder_context *gen6_encoder_context,
94                         struct intel_batchbuffer *batch)
95 {
96     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
97
98     if (batch == NULL)
99         batch = gen6_encoder_context->base.batch;
100
101     BEGIN_BCS_BATCH(batch, 6);
102
103     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
104     OUT_BCS_BATCH(batch, 0);
105     OUT_BCS_BATCH(batch,
106                   ((mfc_context->surface_state.height - 1) << 18) |
107                   ((mfc_context->surface_state.width - 1) << 4));
108     OUT_BCS_BATCH(batch,
109                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
110                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
111                   (0 << 22) | /* surface object control state, FIXME??? */
112                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
113                   (0 << 2)  | /* must be 0 for interleave U/V */
114                   (1 << 1)  | /* must be tiled */
115                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
116     OUT_BCS_BATCH(batch,
117                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
118                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
119     OUT_BCS_BATCH(batch, 0);
120     ADVANCE_BCS_BATCH(batch);
121 }
122
123 static void
124 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
125                                     struct gen6_encoder_context *gen6_encoder_context,
126                                     struct intel_batchbuffer *batch)
127 {
128     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
129     int i;
130
131     if (batch == NULL)
132         batch = gen6_encoder_context->base.batch;
133
134     BEGIN_BCS_BATCH(batch, 61);
135
136     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
137
138     /* the DW1-3 is for pre_deblocking */
139         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
140
141         OUT_BCS_BATCH(batch, 0);
142         OUT_BCS_BATCH(batch, 0);
143      /* the DW4-6 is for the post_deblocking */
144
145     if (mfc_context->post_deblocking_output.bo)
146         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
147                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
148                       0);                                                                                       /* post output addr  */ 
149     else
150         OUT_BCS_BATCH(batch, 0);
151
152         OUT_BCS_BATCH(batch, 0);
153         OUT_BCS_BATCH(batch, 0);
154
155      /* the DW7-9 is for the uncompressed_picture */
156     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
157                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
158                   0); /* uncompressed data */
159
160         OUT_BCS_BATCH(batch, 0);
161         OUT_BCS_BATCH(batch, 0);
162
163      /* the DW10-12 is for the mb status */
164         OUT_BCS_BATCH(batch, 0);
165         OUT_BCS_BATCH(batch, 0);
166         OUT_BCS_BATCH(batch, 0);
167
168      /* the DW13-15 is for the intra_row_store_scratch */
169     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
170                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
171                   0);   
172         OUT_BCS_BATCH(batch, 0);
173         OUT_BCS_BATCH(batch, 0);
174
175      /* the DW16-18 is for the deblocking filter */
176     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
177                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
178                   0);
179         OUT_BCS_BATCH(batch, 0);
180         OUT_BCS_BATCH(batch, 0);
181
182     /* the DW 19-50 is for Reference pictures*/
183     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
184         if ( mfc_context->reference_surfaces[i].bo != NULL) {
185             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
186                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                           0);                   
188         } else {
189             OUT_BCS_BATCH(batch, 0);
190         }
191         OUT_BCS_BATCH(batch, 0);
192     }
193         OUT_BCS_BATCH(batch, 0);
194
195         /* The DW 52-54 is for the MB status buffer */
196         OUT_BCS_BATCH(batch, 0);
197         
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_BATCH(batch, 0);
200
201         /* the DW 55-57 is the ILDB buffer */
202         OUT_BCS_BATCH(batch, 0);
203         OUT_BCS_BATCH(batch, 0);
204         OUT_BCS_BATCH(batch, 0);
205
206         /* the DW 58-60 is the second ILDB buffer */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210     ADVANCE_BCS_BATCH(batch);
211 }
212
213 static void
214 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx,
215                               struct gen6_encoder_context *gen6_encoder_context,
216                               struct intel_batchbuffer *batch)
217 {
218     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
219     int i;
220     struct i965_driver_data *i965 = i965_driver_data(ctx);
221  
222     if (batch == NULL)
223         batch = gen6_encoder_context->base.batch;
224
225     if (IS_STEPPING_BPLUS(i965)) {
226         gen75_mfc_pipe_buf_addr_state_bplus(ctx, gen6_encoder_context, batch);
227         return;
228     }
229
230     BEGIN_BCS_BATCH(batch, 25);
231
232     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
233
234     OUT_BCS_BATCH(batch, 0);                                                                                    /* pre output addr   */
235
236     OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
237                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
238                   0);                                                                                   /* post output addr  */ 
239
240     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
241                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242                   0);                                                                                   /* uncompressed data */
243
244     OUT_BCS_BATCH(batch, 0);                                                                                    /* StreamOut data*/
245     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
246                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
247                   0);   
248     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
249                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
250                   0);
251     /* 7..22 Reference pictures*/
252     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
253         if ( mfc_context->reference_surfaces[i].bo != NULL) {
254             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
255                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
256                           0);                   
257         } else {
258             OUT_BCS_BATCH(batch, 0);
259         }
260     }
261     OUT_BCS_BATCH(batch, 0);                                                                                    /* no block status  */
262
263     OUT_BCS_BATCH(batch, 0);
264
265     ADVANCE_BCS_BATCH(batch);
266 }
267
268
269 static void
270 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
271                                         struct gen6_encoder_context *gen6_encoder_context,
272                                         struct intel_batchbuffer *batch)
273 {
274     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
275     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
276
277     if (batch == NULL)
278         batch = gen6_encoder_context->base.batch;
279
280     BEGIN_BCS_BATCH(batch, 26);
281
282     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
283         /* the DW1-3 is for the MFX indirect bistream offset */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286     OUT_BCS_BATCH(batch, 0);
287         /* the DW4-5 is the MFX upper bound */
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290
291     /* the DW6-10 is for MFX Indirect MV Object Base Address */
292     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
296     OUT_BCS_BATCH(batch, 0);
297
298      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304
305      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311
312     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
313     OUT_BCS_RELOC(batch,
314                   mfc_context->mfc_indirect_pak_bse_object.bo,
315                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
316                   0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319         
320     OUT_BCS_BATCH(batch, 0);
321     OUT_BCS_BATCH(batch, 0x00000000);
322
323     ADVANCE_BCS_BATCH(batch);
324 }
325
326 static void
327 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
328                                   struct gen6_encoder_context *gen6_encoder_context,
329                                   struct intel_batchbuffer *batch)
330 {
331     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
332     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
333     struct i965_driver_data *i965 = i965_driver_data(ctx);
334
335     if (batch == NULL)
336         batch = gen6_encoder_context->base.batch;
337
338     if (IS_STEPPING_BPLUS(i965)) {
339         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
340         return;
341     }
342
343     BEGIN_BCS_BATCH(batch, 11);
344
345     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
346     OUT_BCS_BATCH(batch, 0);
347     OUT_BCS_BATCH(batch, 0);
348     /* MFX Indirect MV Object Base Address */
349     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
350     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
351     OUT_BCS_BATCH(batch, 0);
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354     OUT_BCS_BATCH(batch, 0);
355     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
356     OUT_BCS_RELOC(batch,
357                   mfc_context->mfc_indirect_pak_bse_object.bo,
358                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
359                   0);
360     OUT_BCS_BATCH(batch, 0x00000000); /* must set, up to 2G */
361
362     ADVANCE_BCS_BATCH(batch);
363 }
364
365 static void
366 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
367                                         struct gen6_encoder_context *gen6_encoder_context,
368                                         struct intel_batchbuffer *batch)
369 {
370     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
371
372     if (batch == NULL)
373         batch = gen6_encoder_context->base.batch;
374
375     BEGIN_BCS_BATCH(batch, 10);
376
377     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
378     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
379                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
380                   0);
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383         
384         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388
389         /* the DW7-9 is for Bitplane Read Buffer Base Address */
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393
394     ADVANCE_BCS_BATCH(batch);
395 }
396
397 static void
398 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
399                                   struct gen6_encoder_context *gen6_encoder_context,
400                                   struct intel_batchbuffer *batch)
401 {
402     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
403     struct i965_driver_data *i965 = i965_driver_data(ctx);
404
405     if (batch == NULL)
406         batch = gen6_encoder_context->base.batch;
407
408     if (IS_STEPPING_BPLUS(i965)) {
409         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, gen6_encoder_context, batch);
410         return;
411     }
412  
413
414     BEGIN_BCS_BATCH(batch, 4);
415
416     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
417     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
418                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
419                   0);
420     OUT_BCS_BATCH(batch, 0);
421     OUT_BCS_BATCH(batch, 0);
422
423     ADVANCE_BCS_BATCH(batch);
424 }
425
426 static void
427 gen75_mfc_avc_img_state(VADriverContextP ctx,
428                         struct gen6_encoder_context *gen6_encoder_context,
429                         struct intel_batchbuffer *batch)
430 {
431     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
432     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
433     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
434
435     if (batch == NULL)
436         batch = gen6_encoder_context->base.batch;
437
438     BEGIN_BCS_BATCH(batch, 16);
439     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
440     OUT_BCS_BATCH(batch,
441                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
442     OUT_BCS_BATCH(batch, 
443                   ((height_in_mbs - 1) << 16) | 
444                   ((width_in_mbs - 1) << 0));
445     OUT_BCS_BATCH(batch, 
446                   (0 << 24) |   /* Second Chroma QP Offset */
447                   (0 << 16) |   /* Chroma QP Offset */
448                   (0 << 14) |   /* Max-bit conformance Intra flag */
449                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
450                   (0 << 12) |   /* FIXME: Weighted_Pred_Flag */
451                   (0 << 10) |   /* FIXME: Weighted_BiPred_Idc */
452                   (0 << 8)  |   /* FIXME: Image Structure */
453                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
454     OUT_BCS_BATCH(batch,
455                   (0 << 16) |   /* Mininum Frame size */
456                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
457                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
458                   (0 << 13) |   /* CABAC 0 word insertion test enable */
459                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
460                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
461                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
462                   (1 << 7)  |   /* 0:CAVLC encoding mode,1:CABAC */
463                   (0 << 6)  |   /* Only valid for VLD decoding mode */
464                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
465                   (0 << 4)  |   /* Direct 8x8 inference flag */
466                   (0 << 3)  |   /* Only 8x8 IDCT Transform Mode Flag */
467                   (1 << 2)  |   /* Frame MB only flag */
468                   (0 << 1)  |   /* MBAFF mode is in active */
469                   (0 << 0));    /* Field picture flag */
470     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
471     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
472                   (0xBB8 << 16) |       /* InterMbMaxSz */
473                   (0xEE8) );            /* IntraMbMaxSz */
474     OUT_BCS_BATCH(batch, 0);            /* Reserved */
475     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
476     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
477     OUT_BCS_BATCH(batch, 0x8C000000);
478     OUT_BCS_BATCH(batch, 0x00010000);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     OUT_BCS_BATCH(batch, 0);
482     OUT_BCS_BATCH(batch, 0);
483
484     ADVANCE_BCS_BATCH(batch);
485 }
486
487
488 static void
489 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
490                                      struct gen6_encoder_context *gen6_encoder_context,
491                                      struct intel_batchbuffer *batch)
492 {
493     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
494     int i;
495
496     if (batch == NULL)
497         batch = gen6_encoder_context->base.batch;
498
499     BEGIN_BCS_BATCH(batch, 71);
500
501     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
502
503     /* Reference frames and Current frames */
504     /* the DW1-32 is for the direct MV for reference */
505     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
506         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
507             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
508                           I915_GEM_DOMAIN_INSTRUCTION, 0,
509                           0);
510             OUT_BCS_BATCH(batch, 0);
511         } else {
512             OUT_BCS_BATCH(batch, 0);
513             OUT_BCS_BATCH(batch, 0);
514         }
515     }
516         OUT_BCS_BATCH(batch, 0);
517
518         /* the DW34-36 is the MV for the current reference */
519         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
520                           I915_GEM_DOMAIN_INSTRUCTION, 0,
521                           0);
522
523         OUT_BCS_BATCH(batch, 0);
524         OUT_BCS_BATCH(batch, 0);
525
526     /* POL list */
527     for(i = 0; i < 32; i++) {
528         OUT_BCS_BATCH(batch, i/2);
529     }
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532
533     ADVANCE_BCS_BATCH(batch);
534 }
535
536 static void gen75_mfc_avc_directmode_state(VADriverContextP ctx,
537                                            struct gen6_encoder_context *gen6_encoder_context,
538                                            struct intel_batchbuffer *batch)
539 {
540     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
541     int i;
542     struct i965_driver_data *i965 = i965_driver_data(ctx);
543
544     if (batch == NULL)
545         batch = gen6_encoder_context->base.batch;
546
547     if (IS_STEPPING_BPLUS(i965)) {
548         gen75_mfc_avc_directmode_state_bplus(ctx, gen6_encoder_context, batch);
549         return;
550     }
551
552     BEGIN_BCS_BATCH(batch, 69);
553
554     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
555     //TODO: reference DMV
556     for (i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i++){
557         if (mfc_context->direct_mv_buffers[i].bo)
558                 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
559                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
560                   0);
561         else
562                 OUT_BCS_BATCH(batch, 0);
563     }
564
565     //TODO: current DMV just for test
566 #if 0
567     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[0].bo,
568                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
569                   0);
570 #else
571     //drm_intel_bo_pin(mfc_context->direct_mv_buffers[0].bo, 0x1000);
572     //OUT_BCS_BATCH(batch, mfc_context->direct_mv_buffers[0].bo->offset);
573     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
574                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
575                   0);
576 #endif
577
578
579     OUT_BCS_BATCH(batch, 0);
580
581     //TODO: POL list
582     for(i = 0; i < 34; i++) {
583         OUT_BCS_BATCH(batch, 0);
584     }
585
586     ADVANCE_BCS_BATCH(batch);
587 }
588
589 static void gen75_mfc_avc_slice_state(VADriverContextP ctx,
590                                       int intra_slice,
591                                       struct gen6_encoder_context *gen6_encoder_context,
592                                       struct intel_batchbuffer *batch)
593 {
594     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
595
596     if (batch == NULL)
597         batch = gen6_encoder_context->base.batch;
598
599     BEGIN_BCS_BATCH(batch, 11);;
600
601     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
602
603     if ( intra_slice )
604         OUT_BCS_BATCH(batch, 2);                        /*Slice Type: I Slice*/
605     else
606         OUT_BCS_BATCH(batch, 0);                        /*Slice Type: P Slice*/
607
608     if ( intra_slice )
609         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
610     else 
611         OUT_BCS_BATCH(batch, 0x00010000);       /*1 reference frame*/
612
613     OUT_BCS_BATCH(batch, (0<<24) |                /*Enable deblocking operation*/
614                   (26<<16) |                    /*Slice Quantization Parameter*/
615                   0x0202 );
616     OUT_BCS_BATCH(batch, 0);                    /*First MB X&Y , the postion of current slice*/
617     OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
618
619     OUT_BCS_BATCH(batch, 
620                   (0<<31) |             /*RateControlCounterEnable = disable*/
621                   (1<<30) |             /*ResetRateControlCounter*/
622                   (2<<28) |             /*RC Triggle Mode = Loose Rate Control*/
623                   (1<<19) |             /*IsLastSlice*/
624                   (0<<18) |             /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
625                   (0<<17) |             /*HeaderPresentFlag*/   
626                   (1<<16) |             /*SliceData PresentFlag*/
627                   (0<<15) |             /*TailPresentFlag*/
628                   (1<<13) |             /*RBSP NAL TYPE*/       
629                   (0<<12) );            /*CabacZeroWordInsertionEnable*/
630         
631
632     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
633
634     OUT_BCS_BATCH(batch, 0);
635     OUT_BCS_BATCH(batch, 0);
636     OUT_BCS_BATCH(batch, 0);
637
638     ADVANCE_BCS_BATCH(batch);
639 }
640
641 static void
642 gen75_mfc_qm_state(VADriverContextP ctx,
643                    int qm_type,
644                    unsigned int *qm,
645                    int qm_length,
646                    struct gen6_encoder_context *gen6_encoder_context,
647                    struct intel_batchbuffer *batch)
648 {
649     unsigned int qm_buffer[16];
650
651     if (batch == NULL)
652         batch = gen6_encoder_context->base.batch;
653
654     assert(qm_length <= 16);
655     assert(sizeof(*qm) == 4);
656     memcpy(qm_buffer, qm, qm_length * 4);
657
658     BEGIN_BCS_BATCH(batch, 18);
659     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
660     OUT_BCS_BATCH(batch, qm_type << 0);
661     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
662     ADVANCE_BCS_BATCH(batch);
663 }
664
665 static void gen75_mfc_avc_qm_state(VADriverContextP ctx,
666                                    struct gen6_encoder_context *gen6_encoder_context,
667                                    struct intel_batchbuffer *batch)
668 {
669     unsigned int qm[16] = {
670         0x10101010, 0x10101010, 0x10101010, 0x10101010,
671         0x10101010, 0x10101010, 0x10101010, 0x10101010,
672         0x10101010, 0x10101010, 0x10101010, 0x10101010,
673         0x10101010, 0x10101010, 0x10101010, 0x10101010
674     };
675
676     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
677     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
678     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
679     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
680 }
681
682 static void
683 gen75_mfc_fqm_state(VADriverContextP ctx,
684                     int fqm_type,
685                     unsigned int *fqm,
686                     int fqm_length,
687                     struct gen6_encoder_context *gen6_encoder_context,
688                     struct intel_batchbuffer *batch)
689 {
690     unsigned int fqm_buffer[32];
691
692     if (batch == NULL)
693         batch = gen6_encoder_context->base.batch;
694
695     assert(fqm_length <= 32);
696     assert(sizeof(*fqm) == 4);
697     memcpy(fqm_buffer, fqm, fqm_length * 4);
698
699     BEGIN_BCS_BATCH(batch, 34);
700     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
701     OUT_BCS_BATCH(batch, fqm_type << 0);
702     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
703     ADVANCE_BCS_BATCH(batch);
704 }
705
706 static void gen75_mfc_avc_fqm_state(VADriverContextP ctx,
707                                     struct gen6_encoder_context *gen6_encoder_context,
708                                     struct intel_batchbuffer *batch)
709 {
710     unsigned int qm[32] = {
711         0x10001000, 0x10001000, 0x10001000, 0x10001000,
712         0x10001000, 0x10001000, 0x10001000, 0x10001000,
713         0x10001000, 0x10001000, 0x10001000, 0x10001000,
714         0x10001000, 0x10001000, 0x10001000, 0x10001000,
715         0x10001000, 0x10001000, 0x10001000, 0x10001000,
716         0x10001000, 0x10001000, 0x10001000, 0x10001000,
717         0x10001000, 0x10001000, 0x10001000, 0x10001000,
718         0x10001000, 0x10001000, 0x10001000, 0x10001000
719     };
720
721     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
722     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
723     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
724     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
725 }
726
727 static void gen75_mfc_avc_ref_idx_state(VADriverContextP ctx,
728                                         struct gen6_encoder_context *gen6_encoder_context,
729                                         struct intel_batchbuffer *batch)
730 {
731     int i;
732
733     if (batch == NULL)
734         batch = gen6_encoder_context->base.batch;
735
736     BEGIN_BCS_BATCH(batch, 10);
737
738     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
739     OUT_BCS_BATCH(batch, 0);                  //Select L0
740
741     OUT_BCS_BATCH(batch, 0x80808000);         //Only 1 reference
742     for(i = 0; i < 7; i++) {
743         OUT_BCS_BATCH(batch, 0x80808080);
744     }
745
746     ADVANCE_BCS_BATCH(batch);
747 }
748         
749 static void
750 gen75_mfc_avc_insert_object(VADriverContextP ctx, int flush_data,
751                             struct gen6_encoder_context *gen6_encoder_context,
752                             struct intel_batchbuffer *batch)
753 {
754     if (batch == NULL)
755         batch = gen6_encoder_context->base.batch;
756
757     BEGIN_BCS_BATCH(batch, 4);
758
759     OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (4 -2 ) );
760     OUT_BCS_BATCH(batch, (32<<8) | 
761                   (1 << 3) |
762                   (1 << 2) |
763                   (flush_data << 1) |
764                   (1<<0) );
765     OUT_BCS_BATCH(batch, 0x00000003);
766     OUT_BCS_BATCH(batch, 0xABCD1234);
767
768     ADVANCE_BCS_BATCH(batch);
769 }
770
771 static int
772 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
773                                struct gen6_encoder_context *gen6_encoder_context,
774                                struct intel_batchbuffer *batch)
775 {
776     int len_in_dwords = 12;
777
778     unsigned int intra_msg;
779 #define         INTRA_MSG_FLAG          (1 << 13)
780 #define         INTRA_MBTYPE_MASK       (0x1F0000)
781
782     if (batch == NULL)
783         batch = gen6_encoder_context->base.batch;
784
785     BEGIN_BCS_BATCH(batch, len_in_dwords);
786
787     intra_msg = msg[0] & 0xC0FF;
788     intra_msg |= INTRA_MSG_FLAG;
789     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
790     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793     OUT_BCS_BATCH(batch, 
794                   (0 << 24) |           /* PackedMvNum, Debug*/
795                   (0 << 20) |           /* No motion vector */
796                   (1 << 19) |           /* CbpDcY */
797                   (1 << 18) |           /* CbpDcU */
798                   (1 << 17) |           /* CbpDcV */
799                   intra_msg);
800
801     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);          /* Code Block Pattern for Y*/
802     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
803     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
804
805     /*Stuff for Intra MB*/
806     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
807     OUT_BCS_BATCH(batch, msg[2]);       
808     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
809
810     OUT_BCS_BATCH(batch, 0x00000);      /*MaxSizeInWord and TargetSzieInWord*/
811         OUT_BCS_BATCH(batch, 0);
812
813     ADVANCE_BCS_BATCH(batch);
814
815     return len_in_dwords;
816 }
817
818 static int gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
819                                           unsigned int offset, unsigned int *msg, struct gen6_encoder_context *gen6_encoder_context,
820                                           struct intel_batchbuffer *batch)
821 {
822     int len_in_dwords = 12;
823     unsigned int inter_msg;
824
825     if (batch == NULL)
826         batch = gen6_encoder_context->base.batch;
827
828     BEGIN_BCS_BATCH(batch, len_in_dwords);
829
830     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
831
832         inter_msg = 32;
833         /* MV quantity */
834         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
835                 if (msg[1] & SUBMB_SHAPE_MASK)
836                         inter_msg = 128;
837         }
838     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
839
840     OUT_BCS_BATCH(batch, offset);
841         inter_msg = msg[0] & (0x1F00FFFF);
842         inter_msg |= INTER_MV8;
843         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
844                         (msg[1] & SUBMB_SHAPE_MASK)) {
845                 inter_msg |= INTER_MV32;
846         }
847
848     OUT_BCS_BATCH(batch, inter_msg);
849
850     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
851     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
852     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
853
854     /*Stuff for Inter MB*/
855         inter_msg = msg[1] >> 8;
856     OUT_BCS_BATCH(batch, inter_msg);        
857     OUT_BCS_BATCH(batch, 0x0);    
858     OUT_BCS_BATCH(batch, 0x0);        
859
860     OUT_BCS_BATCH(batch, 0x00000000); /*MaxSizeInWord and TargetSzieInWord*/
861
862     OUT_BCS_BATCH(batch, 0x0);        
863
864     ADVANCE_BCS_BATCH(batch);
865
866     return len_in_dwords;
867 }
868
869 static void gen75_mfc_init(VADriverContextP ctx,
870                            struct encode_state *encode_state,
871                            struct gen6_encoder_context *gen6_encoder_context)
872 {
873     struct i965_driver_data *i965 = i965_driver_data(ctx);
874     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
875     dri_bo *bo;
876     int i;
877     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
878     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
879     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
880
881     /*Encode common setup for MFC*/
882     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
883     mfc_context->post_deblocking_output.bo = NULL;
884
885     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
886     mfc_context->pre_deblocking_output.bo = NULL;
887
888     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
889     mfc_context->uncompressed_picture_source.bo = NULL;
890
891     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
892     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
893
894     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
895         if (mfc_context->reference_surfaces[i].bo != NULL)
896             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
897         mfc_context->reference_surfaces[i].bo = NULL;  
898     }
899
900     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
901     bo = dri_bo_alloc(i965->intel.bufmgr,
902                       "Buffer",
903                       width_in_mbs * 64,
904                       64);
905     assert(bo);
906     mfc_context->intra_row_store_scratch_buffer.bo = bo;
907
908     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
909     bo = dri_bo_alloc(i965->intel.bufmgr,
910                       "Buffer",
911                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
912                       64);
913     assert(bo);
914     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
915
916     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
917     bo = dri_bo_alloc(i965->intel.bufmgr,
918                       "Buffer",
919                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
920                       0x1000);
921     assert(bo);
922     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
923 }
924
925 #define         INTRA_RDO_OFFSET        4
926 #define         INTER_RDO_OFFSET        54
927 #define         INTER_MSG_OFFSET        52
928 #define         INTER_MV_OFFSET         224
929 #define         RDO_MASK                0xFFFF
930
931 static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
932                                       struct encode_state *encode_state,
933                                       struct gen6_encoder_context *gen6_encoder_context)
934 {
935     struct i965_driver_data *i965 = i965_driver_data(ctx);
936     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
937     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
938     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
939     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
940     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
941     unsigned int *msg = NULL, offset = 0;
942     unsigned char *msg_ptr = NULL;
943     int emit_new_state = 1, object_len_in_bytes;
944     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
945     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
946     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
947     int x,y, mb_index;
948     int inter_rdo, intra_rdo;
949
950     intel_batchbuffer_start_atomic_bcs(batch, 0x1000); 
951
952     dri_bo_map(vme_context->vme_output.bo , 1);
953     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
954     if (is_intra) {
955         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
956     } else {
957         msg = (unsigned int *) (msg_ptr + 0 * vme_context->vme_output.size_block);
958         offset = 0; 
959     }
960
961     for (y = 0; y < height_in_mbs; y++) {
962         for (x = 0; x < width_in_mbs; x++) { 
963             int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
964             int qp = pSequenceParameter->initial_qp;
965              mb_index = (y * width_in_mbs) + x;
966             if (emit_new_state) {
967                 intel_batchbuffer_emit_mi_flush(batch);
968                 
969                 gen75_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
970                 gen75_mfc_surface_state(ctx, gen6_encoder_context, batch);
971                 gen75_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
972
973                 gen75_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
974                 gen75_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
975
976                 gen75_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
977                 gen75_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
978                 gen75_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
979                 gen75_mfc_avc_directmode_state(ctx, gen6_encoder_context, batch);
980
981                 gen75_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
982                 gen75_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
983                 emit_new_state = 0;
984             }
985
986             msg = (unsigned int *) (msg_ptr + mb_index * vme_context->vme_output.size_block);
987             if (is_intra) {
988                 object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
989             } else {
990                 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
991                 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
992                 if (intra_rdo < inter_rdo) {
993                     object_len_in_bytes = gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
994                 } else {
995                     msg += INTER_MSG_OFFSET;
996                     offset = mb_index * vme_context->vme_output.size_block + INTER_MV_OFFSET;
997                     object_len_in_bytes = gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, msg, gen6_encoder_context, batch);
998                 }
999             }
1000             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
1001                 intel_batchbuffer_end_atomic(batch);
1002                 intel_batchbuffer_flush(batch);
1003                 emit_new_state = 1;
1004                 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1005             }
1006         }
1007     }
1008
1009     dri_bo_unmap(vme_context->vme_output.bo);
1010         
1011     intel_batchbuffer_end_atomic(batch);
1012 }
1013
1014 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
1015                                      struct encode_state *encode_state,
1016                                      struct gen6_encoder_context *gen6_encoder_context)
1017 {
1018     struct i965_driver_data *i965 = i965_driver_data(ctx);
1019     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1020     struct object_surface *obj_surface; 
1021     struct object_buffer *obj_buffer;
1022     dri_bo *bo;
1023     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1024     VAStatus vaStatus = VA_STATUS_SUCCESS;
1025
1026     /*Setup all the input&output object*/
1027     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1028     assert(obj_surface);
1029     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1030     mfc_context->post_deblocking_output.bo = obj_surface->bo;
1031     dri_bo_reference(mfc_context->post_deblocking_output.bo);
1032
1033     mfc_context->surface_state.width = obj_surface->orig_width;
1034     mfc_context->surface_state.height = obj_surface->orig_height;
1035     mfc_context->surface_state.w_pitch = obj_surface->width;
1036     mfc_context->surface_state.h_pitch = obj_surface->height;
1037
1038     obj_surface = SURFACE(pPicParameter->reference_picture);
1039     assert(obj_surface);
1040     if (obj_surface->bo != NULL) {
1041         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
1042         dri_bo_reference(obj_surface->bo);
1043     }
1044         
1045     obj_surface = SURFACE(encode_state->current_render_target);
1046     assert(obj_surface && obj_surface->bo);
1047     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1048     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1049
1050     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
1051     bo = obj_buffer->buffer_store->bo;
1052     assert(bo);
1053     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1054     mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1055     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1056
1057     /*Programing bcs pipeline*/
1058     gen75_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
1059         
1060     return vaStatus;
1061 }
1062
1063 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
1064                              struct encode_state *encode_state,
1065                              struct gen6_encoder_context *gen6_encoder_context)
1066 {
1067     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1068
1069     intel_batchbuffer_flush(batch);             //run the pipeline
1070
1071     return VA_STATUS_SUCCESS;
1072 }
1073
1074 static VAStatus gen75_mfc_stop(VADriverContextP ctx, 
1075                               struct encode_state *encode_state,
1076                               struct gen6_encoder_context *gen6_encoder_context)
1077 {
1078 #if 0
1079     struct i965_driver_data *i965 = i965_driver_data(ctx);
1080     struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1081         
1082     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
1083         
1084     struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
1085     //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
1086     //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
1087     my_debug(obj_surface);
1088
1089 #endif
1090
1091     return VA_STATUS_SUCCESS;
1092 }
1093
1094 static VAStatus
1095 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1096                             struct encode_state *encode_state,
1097                             struct gen6_encoder_context *gen6_encoder_context)
1098 {
1099     gen75_mfc_init(ctx, encode_state, gen6_encoder_context);
1100     gen75_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1101     gen75_mfc_run(ctx, encode_state, gen6_encoder_context);
1102     gen75_mfc_stop(ctx, encode_state, gen6_encoder_context);
1103
1104     return VA_STATUS_SUCCESS;
1105 }
1106
1107 VAStatus
1108 gen75_mfc_pipeline(VADriverContextP ctx,
1109                   VAProfile profile,
1110                   struct encode_state *encode_state,
1111                   struct gen6_encoder_context *gen6_encoder_context)
1112 {
1113     VAStatus vaStatus;
1114
1115     switch (profile) {
1116     case VAProfileH264Baseline:
1117         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1118         break;
1119
1120         /* FIXME: add for other profile */
1121     default:
1122         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1123         break;
1124     }
1125
1126     return vaStatus;
1127 }
1128
1129 Bool gen75_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1130 {
1131     int i;
1132     struct i965_driver_data *i965 = i965_driver_data(ctx);
1133     dri_bo *bo;
1134         
1135     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1136         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1137         mfc_context->direct_mv_buffers[i].bo = NULL;
1138     }
1139     bo = dri_bo_alloc(i965->intel.bufmgr,
1140                         "Buffer",
1141                          68*8192,
1142                          64);
1143     mfc_context->direct_mv_buffers[0].bo = bo;
1144     bo = dri_bo_alloc(i965->intel.bufmgr,
1145                         "Buffer",
1146                          68*8192,
1147                          64);
1148     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = bo;
1149     return True;
1150 }
1151
1152 Bool gen75_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1153 {
1154     int i;
1155
1156     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1157     mfc_context->post_deblocking_output.bo = NULL;
1158
1159     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1160     mfc_context->pre_deblocking_output.bo = NULL;
1161
1162     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1163     mfc_context->uncompressed_picture_source.bo = NULL;
1164
1165     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1166     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1167
1168     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1169         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1170         mfc_context->direct_mv_buffers[i].bo = NULL;
1171     }
1172
1173     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1174     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1175
1176     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1177     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1178
1179     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1180     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1181
1182     return True;
1183 }