Unify the XXX_free_avc_surface for media encoding/decoding
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #ifndef HAVE_GEN_AVC_SURFACE
31 #define HAVE_GEN_AVC_SURFACE 1
32 #endif
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <math.h>
38 #include <assert.h>
39
40 #include "intel_batchbuffer.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "gen6_mfc.h"
47 #include "gen6_vme.h"
48 #include "intel_media.h"
49
50 #define MFC_SOFTWARE_HASWELL    1
51
52 #define B0_STEP_REV             2
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
54
55 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
57 };
58
59 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
61 };
62
63 static struct i965_kernel gen75_mfc_kernels[] = {
64     {
65         "MFC AVC INTRA BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTRA,
67         gen75_mfc_batchbuffer_avc_intra,
68         sizeof(gen75_mfc_batchbuffer_avc_intra),
69         NULL
70     },
71
72     {
73         "MFC AVC INTER BATCHBUFFER ",
74         MFC_BATCHBUFFER_AVC_INTER,
75         gen75_mfc_batchbuffer_avc_inter,
76         sizeof(gen75_mfc_batchbuffer_avc_inter),
77         NULL
78     },
79 };
80
81 #define         INTER_MODE_MASK         0x03
82 #define         INTER_8X8               0x03
83 #define         SUBMB_SHAPE_MASK        0x00FF00
84
85 #define         INTER_MV8               (4 << 20)
86 #define         INTER_MV32              (6 << 20)
87
88
89 static void
90 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
91                           int standard_select,
92                           struct intel_encoder_context *encoder_context)
93 {
94     struct intel_batchbuffer *batch = encoder_context->base.batch;
95     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
96
97     assert(standard_select == MFX_FORMAT_MPEG2 ||
98            standard_select == MFX_FORMAT_AVC);
99
100     BEGIN_BCS_BATCH(batch, 5);
101
102     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
103     OUT_BCS_BATCH(batch,
104                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
105                   (MFD_MODE_VLD << 15) | /* VLD mode */
106                   (1 << 10) | /* Stream-Out Enable */
107                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
108                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
109                   (0 << 8)  | /* Pre Deblocking Output */
110                   (0 << 5)  | /* not in stitch mode */
111                   (1 << 4)  | /* encoding mode */
112                   (standard_select << 0));  /* standard select: avc or mpeg2 */
113     OUT_BCS_BATCH(batch,
114                   (0 << 7)  | /* expand NOA bus flag */
115                   (0 << 6)  | /* disable slice-level clock gating */
116                   (0 << 5)  | /* disable clock gating for NOA */
117                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
118                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
119                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
120                   (0 << 1)  |
121                   (0 << 0));
122     OUT_BCS_BATCH(batch, 0);
123     OUT_BCS_BATCH(batch, 0);
124
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
130 {
131     struct intel_batchbuffer *batch = encoder_context->base.batch;
132     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
133
134     BEGIN_BCS_BATCH(batch, 6);
135
136     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
137     OUT_BCS_BATCH(batch, 0);
138     OUT_BCS_BATCH(batch,
139                   ((mfc_context->surface_state.height - 1) << 18) |
140                   ((mfc_context->surface_state.width - 1) << 4));
141     OUT_BCS_BATCH(batch,
142                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
143                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
144                   (0 << 22) | /* surface object control state, FIXME??? */
145                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
146                   (0 << 2)  | /* must be 0 for interleave U/V */
147                   (1 << 1)  | /* must be tiled */
148                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
149     OUT_BCS_BATCH(batch,
150                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
151                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
152     OUT_BCS_BATCH(batch, 0);
153
154     ADVANCE_BCS_BATCH(batch);
155 }
156
157 static void
158 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
159                                 struct intel_encoder_context *encoder_context)
160 {
161     struct intel_batchbuffer *batch = encoder_context->base.batch;
162     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
163     struct gen6_vme_context *vme_context = encoder_context->vme_context;
164
165     BEGIN_BCS_BATCH(batch, 26);
166
167     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
168         /* the DW1-3 is for the MFX indirect bistream offset */
169     OUT_BCS_BATCH(batch, 0);
170     OUT_BCS_BATCH(batch, 0);
171     OUT_BCS_BATCH(batch, 0);
172         /* the DW4-5 is the MFX upper bound */
173     OUT_BCS_BATCH(batch, 0);
174     OUT_BCS_BATCH(batch, 0);
175
176     /* the DW6-10 is for MFX Indirect MV Object Base Address */
177     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
178     OUT_BCS_BATCH(batch, 0);
179     OUT_BCS_BATCH(batch, 0);
180     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
181     OUT_BCS_BATCH(batch, 0);
182
183      /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
184     OUT_BCS_BATCH(batch, 0);
185     OUT_BCS_BATCH(batch, 0);
186     OUT_BCS_BATCH(batch, 0);
187     OUT_BCS_BATCH(batch, 0);
188     OUT_BCS_BATCH(batch, 0);
189
190      /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */    
191     OUT_BCS_BATCH(batch, 0);
192     OUT_BCS_BATCH(batch, 0);
193     OUT_BCS_BATCH(batch, 0);
194     OUT_BCS_BATCH(batch, 0);
195     OUT_BCS_BATCH(batch, 0);
196
197     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/        
198     OUT_BCS_RELOC(batch,
199                   mfc_context->mfc_indirect_pak_bse_object.bo,
200                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
201                   0);
202     OUT_BCS_BATCH(batch, 0);
203     OUT_BCS_BATCH(batch, 0);
204         
205     OUT_BCS_RELOC(batch,
206                   mfc_context->mfc_indirect_pak_bse_object.bo,
207                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
208                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
209     OUT_BCS_BATCH(batch, 0);
210
211     ADVANCE_BCS_BATCH(batch);
212 }
213
214 static void
215 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
216 {
217     struct intel_batchbuffer *batch = encoder_context->base.batch;
218     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
219     struct gen6_vme_context *vme_context = encoder_context->vme_context;
220     struct i965_driver_data *i965 = i965_driver_data(ctx);
221
222     if (IS_STEPPING_BPLUS(i965)) {
223         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
224         return;
225     }
226     BEGIN_BCS_BATCH(batch, 11);
227
228     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
229     OUT_BCS_BATCH(batch, 0);
230     OUT_BCS_BATCH(batch, 0);
231     /* MFX Indirect MV Object Base Address */
232     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
233     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
234     OUT_BCS_BATCH(batch, 0);
235     OUT_BCS_BATCH(batch, 0);
236     OUT_BCS_BATCH(batch, 0);
237     OUT_BCS_BATCH(batch, 0);
238     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
239     OUT_BCS_RELOC(batch,
240                   mfc_context->mfc_indirect_pak_bse_object.bo,
241                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242                   0);
243     OUT_BCS_RELOC(batch,
244                   mfc_context->mfc_indirect_pak_bse_object.bo,
245                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
246                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
247
248     ADVANCE_BCS_BATCH(batch);
249 }
250
251 static void
252 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
253                        struct intel_encoder_context *encoder_context)
254 {
255     struct intel_batchbuffer *batch = encoder_context->base.batch;
256     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
257     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
258
259     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
260     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
261
262     BEGIN_BCS_BATCH(batch, 16);
263
264     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
265     OUT_BCS_BATCH(batch,
266                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
267     OUT_BCS_BATCH(batch, 
268                   ((height_in_mbs - 1) << 16) | 
269                   ((width_in_mbs - 1) << 0));
270     OUT_BCS_BATCH(batch, 
271                   (0 << 24) |   /* Second Chroma QP Offset */
272                   (0 << 16) |   /* Chroma QP Offset */
273                   (0 << 14) |   /* Max-bit conformance Intra flag */
274                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
275                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
276                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
277                   (0 << 8)  |   /* FIXME: Image Structure */
278                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
279     OUT_BCS_BATCH(batch,
280                   (0 << 16) |   /* Mininum Frame size */
281                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
282                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
283                   (0 << 13) |   /* CABAC 0 word insertion test enable */
284                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
285                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
286                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
287                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
288                   (0 << 6)  |   /* Only valid for VLD decoding mode */
289                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
290                   (0 << 4)  |   /* Direct 8x8 inference flag */
291                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
292                   (1 << 2)  |   /* Frame MB only flag */
293                   (0 << 1)  |   /* MBAFF mode is in active */
294                   (0 << 0));    /* Field picture flag */
295     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
296     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
297                   (0xBB8 << 16) |       /* InterMbMaxSz */
298                   (0xEE8) );            /* IntraMbMaxSz */
299     OUT_BCS_BATCH(batch, 0);            /* Reserved */
300     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
301     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
302     OUT_BCS_BATCH(batch, 0x8C000000);
303     OUT_BCS_BATCH(batch, 0x00010000);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309     ADVANCE_BCS_BATCH(batch);
310 }
311
312 static void
313 gen75_mfc_qm_state(VADriverContextP ctx,
314                   int qm_type,
315                   unsigned int *qm,
316                   int qm_length,
317                   struct intel_encoder_context *encoder_context)
318 {
319     struct intel_batchbuffer *batch = encoder_context->base.batch;
320     unsigned int qm_buffer[16];
321
322     assert(qm_length <= 16);
323     assert(sizeof(*qm) == 4);
324     memcpy(qm_buffer, qm, qm_length * 4);
325
326     BEGIN_BCS_BATCH(batch, 18);
327     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
328     OUT_BCS_BATCH(batch, qm_type << 0);
329     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
330     ADVANCE_BCS_BATCH(batch);
331 }
332
333 static void
334 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
335 {
336     unsigned int qm[16] = {
337         0x10101010, 0x10101010, 0x10101010, 0x10101010,
338         0x10101010, 0x10101010, 0x10101010, 0x10101010,
339         0x10101010, 0x10101010, 0x10101010, 0x10101010,
340         0x10101010, 0x10101010, 0x10101010, 0x10101010
341     };
342
343     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
344     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
345     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
346     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
347 }
348
349 static void
350 gen75_mfc_fqm_state(VADriverContextP ctx,
351                    int fqm_type,
352                    unsigned int *fqm,
353                    int fqm_length,
354                    struct intel_encoder_context *encoder_context)
355 {
356     struct intel_batchbuffer *batch = encoder_context->base.batch;
357     unsigned int fqm_buffer[32];
358
359     assert(fqm_length <= 32);
360     assert(sizeof(*fqm) == 4);
361     memcpy(fqm_buffer, fqm, fqm_length * 4);
362
363     BEGIN_BCS_BATCH(batch, 34);
364     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
365     OUT_BCS_BATCH(batch, fqm_type << 0);
366     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
367     ADVANCE_BCS_BATCH(batch);
368 }
369
370 static void
371 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
372 {
373     unsigned int qm[32] = {
374         0x10001000, 0x10001000, 0x10001000, 0x10001000,
375         0x10001000, 0x10001000, 0x10001000, 0x10001000,
376         0x10001000, 0x10001000, 0x10001000, 0x10001000,
377         0x10001000, 0x10001000, 0x10001000, 0x10001000,
378         0x10001000, 0x10001000, 0x10001000, 0x10001000,
379         0x10001000, 0x10001000, 0x10001000, 0x10001000,
380         0x10001000, 0x10001000, 0x10001000, 0x10001000,
381         0x10001000, 0x10001000, 0x10001000, 0x10001000
382     };
383
384     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
385     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
386     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
387     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
388 }
389
390 static void
391 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
392                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
393                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
394                            struct intel_batchbuffer *batch)
395 {
396     if (batch == NULL)
397         batch = encoder_context->base.batch;
398
399     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
400
401     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
402     OUT_BCS_BATCH(batch,
403                   (0 << 16) |   /* always start at offset 0 */
404                   (data_bits_in_last_dw << 8) |
405                   (skip_emul_byte_count << 4) |
406                   (!!emulation_flag << 3) |
407                   ((!!is_last_header) << 2) |
408                   ((!!is_end_of_slice) << 1) |
409                   (0 << 0));    /* FIXME: ??? */
410     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
411
412     ADVANCE_BCS_BATCH(batch);
413 }
414
415
416 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
417 {
418     struct i965_driver_data *i965 = i965_driver_data(ctx);
419     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
420     dri_bo *bo;
421     int i;
422
423     /*Encode common setup for MFC*/
424     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
425     mfc_context->post_deblocking_output.bo = NULL;
426
427     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
428     mfc_context->pre_deblocking_output.bo = NULL;
429
430     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
431     mfc_context->uncompressed_picture_source.bo = NULL;
432
433     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
434     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
435
436     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
437         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
438         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
439         mfc_context->direct_mv_buffers[i].bo = NULL;
440     }
441
442     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
443         if (mfc_context->reference_surfaces[i].bo != NULL)
444             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
445         mfc_context->reference_surfaces[i].bo = NULL;  
446     }
447
448     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
449     bo = dri_bo_alloc(i965->intel.bufmgr,
450                       "Buffer",
451                       128 * 64,
452                       64);
453     assert(bo);
454     mfc_context->intra_row_store_scratch_buffer.bo = bo;
455
456     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
457     bo = dri_bo_alloc(i965->intel.bufmgr,
458                       "Buffer",
459                       128*128*16,
460                       64);
461     assert(bo);
462     mfc_context->macroblock_status_buffer.bo = bo;
463
464     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
465     bo = dri_bo_alloc(i965->intel.bufmgr,
466                       "Buffer",
467                       49152,  /* 6 * 128 * 64 */
468                       64);
469     assert(bo);
470     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
471
472     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
473     bo = dri_bo_alloc(i965->intel.bufmgr,
474                       "Buffer",
475                       12288, /* 1.5 * 128 * 64 */
476                       0x1000);
477     assert(bo);
478     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
479
480     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
481     mfc_context->mfc_batchbuffer_surface.bo = NULL;
482
483     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
484     mfc_context->aux_batchbuffer_surface.bo = NULL;
485
486     if (mfc_context->aux_batchbuffer)
487         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
488
489     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
490     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
491     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
492     mfc_context->aux_batchbuffer_surface.pitch = 16;
493     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
494     mfc_context->aux_batchbuffer_surface.size_block = 16;
495
496     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
497 }
498
499 static void
500 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
501                                 struct intel_encoder_context *encoder_context)
502 {
503     struct intel_batchbuffer *batch = encoder_context->base.batch;
504     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
505     int i;
506
507     BEGIN_BCS_BATCH(batch, 61);
508
509     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
510
511     /* the DW1-3 is for pre_deblocking */
512     if (mfc_context->pre_deblocking_output.bo)
513         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
514                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
515                       0);
516     else
517         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
518
519         OUT_BCS_BATCH(batch, 0);
520         OUT_BCS_BATCH(batch, 0);
521      /* the DW4-6 is for the post_deblocking */
522
523     if (mfc_context->post_deblocking_output.bo)
524         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
525                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
526                       0);                                                                                       /* post output addr  */ 
527     else
528         OUT_BCS_BATCH(batch, 0);
529         OUT_BCS_BATCH(batch, 0);
530         OUT_BCS_BATCH(batch, 0);
531
532      /* the DW7-9 is for the uncompressed_picture */
533     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
534                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535                   0); /* uncompressed data */
536
537         OUT_BCS_BATCH(batch, 0);
538         OUT_BCS_BATCH(batch, 0);
539
540      /* the DW10-12 is for the mb status */
541     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
542                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
543                   0); /* StreamOut data*/
544         OUT_BCS_BATCH(batch, 0);
545         OUT_BCS_BATCH(batch, 0);
546
547      /* the DW13-15 is for the intra_row_store_scratch */
548     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
549                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
550                   0);   
551         OUT_BCS_BATCH(batch, 0);
552         OUT_BCS_BATCH(batch, 0);
553
554      /* the DW16-18 is for the deblocking filter */
555     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
556                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
557                   0);
558         OUT_BCS_BATCH(batch, 0);
559         OUT_BCS_BATCH(batch, 0);
560
561     /* the DW 19-50 is for Reference pictures*/
562     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
563         if ( mfc_context->reference_surfaces[i].bo != NULL) {
564             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
565                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566                           0);                   
567         } else {
568             OUT_BCS_BATCH(batch, 0);
569         }
570         OUT_BCS_BATCH(batch, 0);
571     }
572         OUT_BCS_BATCH(batch, 0);
573
574         /* The DW 52-54 is for the MB status buffer */
575     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
576                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
577                   0);                                                                                   /* Macroblock status buffer*/
578         
579         OUT_BCS_BATCH(batch, 0);
580         OUT_BCS_BATCH(batch, 0);
581
582         /* the DW 55-57 is the ILDB buffer */
583         OUT_BCS_BATCH(batch, 0);
584         OUT_BCS_BATCH(batch, 0);
585         OUT_BCS_BATCH(batch, 0);
586
587         /* the DW 58-60 is the second ILDB buffer */
588         OUT_BCS_BATCH(batch, 0);
589         OUT_BCS_BATCH(batch, 0);
590         OUT_BCS_BATCH(batch, 0);
591     ADVANCE_BCS_BATCH(batch);
592 }
593
594 static void
595 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
596 {
597     struct intel_batchbuffer *batch = encoder_context->base.batch;
598     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
599     struct i965_driver_data *i965 = i965_driver_data(ctx);
600     int i;
601
602     if (IS_STEPPING_BPLUS(i965)) {
603         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
604         return;
605     }
606
607     BEGIN_BCS_BATCH(batch, 25);
608
609     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
610
611     if (mfc_context->pre_deblocking_output.bo)
612         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
613                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
614                       0);
615     else
616         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
617
618     if (mfc_context->post_deblocking_output.bo)
619         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
620                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
621                       0);                                                                                       /* post output addr  */ 
622     else
623         OUT_BCS_BATCH(batch, 0);
624
625     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
626                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
627                   0);                                                                                   /* uncompressed data */
628     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
629                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630                   0);                                                                                   /* StreamOut data*/
631     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
632                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
633                   0);   
634     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
635                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                   0);
637     /* 7..22 Reference pictures*/
638     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
639         if ( mfc_context->reference_surfaces[i].bo != NULL) {
640             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
641                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
642                           0);                   
643         } else {
644             OUT_BCS_BATCH(batch, 0);
645         }
646     }
647     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
648                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
649                   0);                                                                                   /* Macroblock status buffer*/
650
651         OUT_BCS_BATCH(batch, 0);
652
653     ADVANCE_BCS_BATCH(batch);
654 }
655
656 static void
657 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
658                                 struct intel_encoder_context *encoder_context)
659 {
660     struct intel_batchbuffer *batch = encoder_context->base.batch;
661     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
662
663     int i;
664
665     BEGIN_BCS_BATCH(batch, 71);
666
667     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
668
669     /* Reference frames and Current frames */
670     /* the DW1-32 is for the direct MV for reference */
671     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
672         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
673             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
674                           I915_GEM_DOMAIN_INSTRUCTION, 0,
675                           0);
676             OUT_BCS_BATCH(batch, 0);
677         } else {
678             OUT_BCS_BATCH(batch, 0);
679             OUT_BCS_BATCH(batch, 0);
680         }
681     }
682         OUT_BCS_BATCH(batch, 0);
683
684         /* the DW34-36 is the MV for the current reference */
685         OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
686                           I915_GEM_DOMAIN_INSTRUCTION, 0,
687                           0);
688
689         OUT_BCS_BATCH(batch, 0);
690         OUT_BCS_BATCH(batch, 0);
691
692     /* POL list */
693     for(i = 0; i < 32; i++) {
694         OUT_BCS_BATCH(batch, i/2);
695     }
696     OUT_BCS_BATCH(batch, 0);
697     OUT_BCS_BATCH(batch, 0);
698
699     ADVANCE_BCS_BATCH(batch);
700 }
701
702 static void
703 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
704 {
705     struct intel_batchbuffer *batch = encoder_context->base.batch;
706     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
707     struct i965_driver_data *i965 = i965_driver_data(ctx);
708     int i;
709
710     if (IS_STEPPING_BPLUS(i965)) {
711         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
712         return;
713     }
714
715     BEGIN_BCS_BATCH(batch, 69);
716
717     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
718
719     /* Reference frames and Current frames */
720     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
721         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
722             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
723                           I915_GEM_DOMAIN_INSTRUCTION, 0,
724                           0);
725         } else {
726             OUT_BCS_BATCH(batch, 0);
727         }
728     }
729
730     /* POL list */
731     for(i = 0; i < 32; i++) {
732         OUT_BCS_BATCH(batch, i/2);
733     }
734     OUT_BCS_BATCH(batch, 0);
735     OUT_BCS_BATCH(batch, 0);
736
737     ADVANCE_BCS_BATCH(batch);
738 }
739
740 static void
741 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
742 {
743     struct intel_batchbuffer *batch = encoder_context->base.batch;
744     int i;
745
746     BEGIN_BCS_BATCH(batch, 10);
747     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
748     OUT_BCS_BATCH(batch, 0);                  //Select L0
749     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
750     for(i = 0; i < 7; i++) {
751         OUT_BCS_BATCH(batch, 0x80808080);
752     }   
753     ADVANCE_BCS_BATCH(batch);
754
755     BEGIN_BCS_BATCH(batch, 10);
756     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
757     OUT_BCS_BATCH(batch, 1);                  //Select L1
758     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
759     for(i = 0; i < 7; i++) {
760         OUT_BCS_BATCH(batch, 0x80808080);
761     }   
762     ADVANCE_BCS_BATCH(batch);
763 }
764
765
766 static void
767 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
768                                 struct intel_encoder_context *encoder_context)
769 {
770     struct intel_batchbuffer *batch = encoder_context->base.batch;
771     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
772
773     BEGIN_BCS_BATCH(batch, 10);
774
775     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
776     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
777                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
778                   0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781         
782         /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785     OUT_BCS_BATCH(batch, 0);
786
787         /* the DW7-9 is for Bitplane Read Buffer Base Address */
788     OUT_BCS_BATCH(batch, 0);
789     OUT_BCS_BATCH(batch, 0);
790     OUT_BCS_BATCH(batch, 0);
791
792     ADVANCE_BCS_BATCH(batch);
793 }
794
795 static void
796 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
797 {
798     struct intel_batchbuffer *batch = encoder_context->base.batch;
799     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
800     struct i965_driver_data *i965 = i965_driver_data(ctx);
801
802     if (IS_STEPPING_BPLUS(i965)) {
803         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
804         return;
805     }
806
807     BEGIN_BCS_BATCH(batch, 4);
808
809     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
810     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
811                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
812                   0);
813     OUT_BCS_BATCH(batch, 0);
814     OUT_BCS_BATCH(batch, 0);
815
816     ADVANCE_BCS_BATCH(batch);
817 }
818
819
820 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
821                                       struct encode_state *encode_state,
822                                       struct intel_encoder_context *encoder_context)
823 {
824     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
825
826     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
827     mfc_context->set_surface_state(ctx, encoder_context);
828     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
829     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
830     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
831     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
832     mfc_context->avc_qm_state(ctx, encoder_context);
833     mfc_context->avc_fqm_state(ctx, encoder_context);
834     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
835     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
836 }
837
838
839 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
840                                      struct encode_state *encode_state,
841                                      struct intel_encoder_context *encoder_context)
842 {
843     struct i965_driver_data *i965 = i965_driver_data(ctx);
844     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
845     struct object_surface *obj_surface; 
846     struct object_buffer *obj_buffer;
847     GenAvcSurface *gen6_avc_surface;
848     dri_bo *bo;
849     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
850     VAStatus vaStatus = VA_STATUS_SUCCESS;
851     int i, j, enable_avc_ildb = 0;
852     VAEncSliceParameterBufferH264 *slice_param;
853     VACodedBufferSegment *coded_buffer_segment;
854     unsigned char *flag = NULL;
855
856     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
857         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
858         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
859
860         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
861             assert((slice_param->slice_type == SLICE_TYPE_I) ||
862                    (slice_param->slice_type == SLICE_TYPE_SI) ||
863                    (slice_param->slice_type == SLICE_TYPE_P) ||
864                    (slice_param->slice_type == SLICE_TYPE_SP) ||
865                    (slice_param->slice_type == SLICE_TYPE_B));
866
867             if (slice_param->disable_deblocking_filter_idc != 1) {
868                 enable_avc_ildb = 1;
869                 break;
870             }
871
872             slice_param++;
873         }
874     }
875
876     /*Setup all the input&output object*/
877
878     /* Setup current frame and current direct mv buffer*/
879     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
880     assert(obj_surface);
881     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
882
883     if ( obj_surface->private_data == NULL) {
884         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
885         gen6_avc_surface->dmv_top = 
886             dri_bo_alloc(i965->intel.bufmgr,
887                          "Buffer",
888                          68*8192, 
889                          64);
890         gen6_avc_surface->dmv_bottom = 
891             dri_bo_alloc(i965->intel.bufmgr,
892                          "Buffer",
893                          68*8192, 
894                          64);
895         assert(gen6_avc_surface->dmv_top);
896         assert(gen6_avc_surface->dmv_bottom);
897         obj_surface->private_data = (void *)gen6_avc_surface;
898         obj_surface->free_private_data = (void *)gen_free_avc_surface; 
899     }
900     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
901     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
902     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
903     dri_bo_reference(gen6_avc_surface->dmv_top);
904     dri_bo_reference(gen6_avc_surface->dmv_bottom);
905
906     if (enable_avc_ildb) {
907         mfc_context->post_deblocking_output.bo = obj_surface->bo;
908         dri_bo_reference(mfc_context->post_deblocking_output.bo);
909     } else {
910         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
911         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
912     }
913
914     mfc_context->surface_state.width = obj_surface->orig_width;
915     mfc_context->surface_state.height = obj_surface->orig_height;
916     mfc_context->surface_state.w_pitch = obj_surface->width;
917     mfc_context->surface_state.h_pitch = obj_surface->height;
918     
919     /* Setup reference frames and direct mv buffers*/
920     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
921         if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
922             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
923             assert(obj_surface);
924             if (obj_surface->bo != NULL) {
925                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
926                 dri_bo_reference(obj_surface->bo);
927             }
928             /* Check DMV buffer */
929             if ( obj_surface->private_data == NULL) {
930                 
931                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
932                 gen6_avc_surface->dmv_top = 
933                     dri_bo_alloc(i965->intel.bufmgr,
934                                  "Buffer",
935                                  68*8192, 
936                                  64);
937                 gen6_avc_surface->dmv_bottom = 
938                     dri_bo_alloc(i965->intel.bufmgr,
939                                  "Buffer",
940                                  68*8192, 
941                                  64);
942                 assert(gen6_avc_surface->dmv_top);
943                 assert(gen6_avc_surface->dmv_bottom);
944                 obj_surface->private_data = gen6_avc_surface;
945                 obj_surface->free_private_data = gen_free_avc_surface; 
946             }
947     
948             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
949             /* Setup DMV buffer */
950             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
951             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
952             dri_bo_reference(gen6_avc_surface->dmv_top);
953             dri_bo_reference(gen6_avc_surface->dmv_bottom);
954         } else {
955             break;
956         }
957     }
958         
959     obj_surface = SURFACE(encoder_context->input_yuv_surface);
960     assert(obj_surface && obj_surface->bo);
961     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
962     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
963
964     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
965     bo = obj_buffer->buffer_store->bo;
966     assert(bo);
967     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
968     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
969     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
970     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
971     
972     dri_bo_map(bo, 1);
973     coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
974     flag = (unsigned char *)(coded_buffer_segment + 1);
975     *flag = 0;
976     dri_bo_unmap(bo);
977
978     return vaStatus;
979 }
980
981
982 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
983                              struct encode_state *encode_state,
984                              struct intel_encoder_context *encoder_context)
985 {
986     struct intel_batchbuffer *batch = encoder_context->base.batch;
987
988     intel_batchbuffer_flush(batch);             //run the pipeline
989
990     return VA_STATUS_SUCCESS;
991 }
992
993
994 static VAStatus
995 gen75_mfc_stop(VADriverContextP ctx, 
996               struct encode_state *encode_state,
997               struct intel_encoder_context *encoder_context,
998               int *encoded_bits_size)
999 {
1000     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1001     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1002     VACodedBufferSegment *coded_buffer_segment;
1003     
1004     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1005     assert(vaStatus == VA_STATUS_SUCCESS);
1006     *encoded_bits_size = coded_buffer_segment->size * 8;
1007     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1008
1009     return VA_STATUS_SUCCESS;
1010 }
1011
1012
1013 static void
1014 gen75_mfc_avc_slice_state(VADriverContextP ctx,
1015                          VAEncPictureParameterBufferH264 *pic_param,
1016                          VAEncSliceParameterBufferH264 *slice_param,
1017                          struct encode_state *encode_state,
1018                          struct intel_encoder_context *encoder_context,
1019                          int rate_control_enable,
1020                          int qp,
1021                          struct intel_batchbuffer *batch)
1022 {
1023     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1024     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1025     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1026     int beginmb = slice_param->macroblock_address;
1027     int endmb = beginmb + slice_param->num_macroblocks;
1028     int beginx = beginmb % width_in_mbs;
1029     int beginy = beginmb / width_in_mbs;
1030     int nextx =  endmb % width_in_mbs;
1031     int nexty = endmb / width_in_mbs;
1032     int slice_type = slice_param->slice_type;
1033     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
1034     int bit_rate_control_target, maxQpN, maxQpP;
1035     unsigned char correct[6], grow, shrink;
1036     int i;
1037     int weighted_pred_idc = 0;
1038     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
1039     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
1040
1041     if (batch == NULL)
1042         batch = encoder_context->base.batch;
1043
1044     bit_rate_control_target = slice_type;
1045     if (slice_type == SLICE_TYPE_SP)
1046         bit_rate_control_target = SLICE_TYPE_P;
1047     else if (slice_type == SLICE_TYPE_SI)
1048         bit_rate_control_target = SLICE_TYPE_I;
1049
1050     if (slice_type == SLICE_TYPE_P) {
1051         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
1052     } else if (slice_type == SLICE_TYPE_B) {
1053         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1054
1055         if (weighted_pred_idc == 2) {
1056             /* 8.4.3 - Derivation process for prediction weights (8-279) */
1057             luma_log2_weight_denom = 5;
1058             chroma_log2_weight_denom = 5;
1059         }
1060     }
1061
1062     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
1063     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
1064
1065     for (i = 0; i < 6; i++)
1066         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
1067
1068     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
1069         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
1070     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
1071         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
1072
1073     BEGIN_BCS_BATCH(batch, 11);;
1074
1075     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
1076     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
1077
1078     if (slice_type == SLICE_TYPE_I) {
1079         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
1080     } else {
1081         OUT_BCS_BATCH(batch,
1082                       (1 << 16) |                       /*1 reference frame*/
1083                       (chroma_log2_weight_denom << 8) |
1084                       (luma_log2_weight_denom << 0));
1085     }
1086
1087     OUT_BCS_BATCH(batch, 
1088                   (weighted_pred_idc << 30) |
1089                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
1090                   (slice_param->disable_deblocking_filter_idc << 27) |
1091                   (slice_param->cabac_init_idc << 24) |
1092                   (qp<<16) |                    /*Slice Quantization Parameter*/
1093                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1094                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1095     OUT_BCS_BATCH(batch,
1096                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
1097                   (beginx << 16) |
1098                   slice_param->macroblock_address );
1099     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
1100     OUT_BCS_BATCH(batch, 
1101                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
1102                   (1 << 30) |           /*ResetRateControlCounter*/
1103                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
1104                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
1105                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
1106                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
1107                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
1108                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
1109                   (last_slice << 19) |     /*IsLastSlice*/
1110                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
1111                   (1 << 17) |       /*HeaderPresentFlag*/       
1112                   (1 << 16) |       /*SliceData PresentFlag*/
1113                   (1 << 15) |       /*TailPresentFlag*/
1114                   (1 << 13) |       /*RBSP NAL TYPE*/   
1115                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
1116     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1117     OUT_BCS_BATCH(batch,
1118                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
1119                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
1120                   (shrink << 8)  |
1121                   (grow << 0));   
1122     OUT_BCS_BATCH(batch,
1123                   (correct[5] << 20) |
1124                   (correct[4] << 16) |
1125                   (correct[3] << 12) |
1126                   (correct[2] << 8) |
1127                   (correct[1] << 4) |
1128                   (correct[0] << 0));
1129     OUT_BCS_BATCH(batch, 0);
1130
1131     ADVANCE_BCS_BATCH(batch);
1132 }
1133
1134
1135 #ifdef MFC_SOFTWARE_HASWELL
1136
1137 static int
1138 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1139                                 int qp,unsigned int *msg,
1140                               struct intel_encoder_context *encoder_context,
1141                               unsigned char target_mb_size, unsigned char max_mb_size,
1142                               struct intel_batchbuffer *batch)
1143 {
1144     int len_in_dwords = 12;
1145     unsigned int intra_msg;
1146 #define         INTRA_MSG_FLAG          (1 << 13)
1147 #define         INTRA_MBTYPE_MASK       (0x1F0000)
1148     if (batch == NULL)
1149         batch = encoder_context->base.batch;
1150
1151     BEGIN_BCS_BATCH(batch, len_in_dwords);
1152
1153     intra_msg = msg[0] & 0xC0FF;
1154     intra_msg |= INTRA_MSG_FLAG;
1155     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1156     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1157     OUT_BCS_BATCH(batch, 0);
1158     OUT_BCS_BATCH(batch, 0);
1159     OUT_BCS_BATCH(batch, 
1160                   (0 << 24) |           /* PackedMvNum, Debug*/
1161                   (0 << 20) |           /* No motion vector */
1162                   (1 << 19) |           /* CbpDcY */
1163                   (1 << 18) |           /* CbpDcU */
1164                   (1 << 17) |           /* CbpDcV */
1165                   intra_msg);
1166
1167     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
1168     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
1169     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
1170
1171     /*Stuff for Intra MB*/
1172     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
1173     OUT_BCS_BATCH(batch, msg[2]);       
1174     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
1175     
1176     /*MaxSizeInWord and TargetSzieInWord*/
1177     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1178                   (target_mb_size << 16) );
1179
1180     OUT_BCS_BATCH(batch, 0);
1181
1182     ADVANCE_BCS_BATCH(batch);
1183
1184     return len_in_dwords;
1185 }
1186
1187 static int
1188 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1189                               unsigned int *msg, unsigned int offset,
1190                               struct intel_encoder_context *encoder_context,
1191                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1192                               struct intel_batchbuffer *batch)
1193 {
1194     int len_in_dwords = 12;
1195         unsigned int inter_msg = 0;
1196     if (batch == NULL)
1197         batch = encoder_context->base.batch;
1198
1199     BEGIN_BCS_BATCH(batch, len_in_dwords);
1200
1201     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1202
1203         inter_msg = 32;
1204         /* MV quantity */
1205         if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1206                 if (msg[1] & SUBMB_SHAPE_MASK)
1207                         inter_msg = 128;
1208         }
1209     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
1210     OUT_BCS_BATCH(batch, offset);
1211         inter_msg = msg[0] & (0x1F00FFFF);
1212         inter_msg |= INTER_MV8;
1213         if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1214                         (msg[1] & SUBMB_SHAPE_MASK)) {
1215                 inter_msg |= INTER_MV32;
1216         }
1217
1218     OUT_BCS_BATCH(batch, inter_msg);
1219
1220     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1221     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1222 #if 0 
1223     if ( slice_type == SLICE_TYPE_B) {
1224         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1225     } else {
1226         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1227     }
1228 #else
1229     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1230 #endif
1231
1232         inter_msg = msg[1] >> 8;
1233     /*Stuff for Inter MB*/
1234     OUT_BCS_BATCH(batch, inter_msg);        
1235     OUT_BCS_BATCH(batch, 0x0);    
1236     OUT_BCS_BATCH(batch, 0x0);        
1237
1238     /*MaxSizeInWord and TargetSzieInWord*/
1239     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1240                   (target_mb_size << 16) );
1241
1242     OUT_BCS_BATCH(batch, 0x0);    
1243
1244     ADVANCE_BCS_BATCH(batch);
1245
1246     return len_in_dwords;
1247 }
1248
1249 #define         INTRA_RDO_OFFSET        4
1250 #define         INTER_RDO_OFFSET        54
1251 #define         INTER_MSG_OFFSET        52
1252 #define         INTER_MV_OFFSET         224
1253 #define         RDO_MASK                0xFFFF
1254
1255 static void 
1256 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1257                                        struct encode_state *encode_state,
1258                                        struct intel_encoder_context *encoder_context,
1259                                        int slice_index,
1260                                        struct intel_batchbuffer *slice_batch)
1261 {
1262     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1263     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1264     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1266     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1267     unsigned int *msg = NULL, offset = 0;
1268     unsigned char *msg_ptr = NULL;
1269     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1270     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1271     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1272     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1273     int i,x,y;
1274     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1275     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1276     unsigned char *slice_header = NULL;
1277     int slice_header_length_in_bits = 0;
1278     unsigned int tail_data[] = { 0x0, 0x0 };
1279     int slice_type = pSliceParameter->slice_type;
1280
1281
1282     if (rate_control_mode == VA_RC_CBR) {
1283         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1284         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1285     }
1286
1287     /* only support for 8-bit pixel bit-depth */
1288     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1289     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1290     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1291     assert(qp >= 0 && qp < 52);
1292
1293     gen75_mfc_avc_slice_state(ctx, 
1294                              pPicParameter,
1295                              pSliceParameter,
1296                              encode_state, encoder_context,
1297                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1298
1299     if ( slice_index == 0) 
1300         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1301
1302     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1303
1304     // slice hander
1305     mfc_context->insert_object(ctx, encoder_context,
1306                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1307                                5,  /* first 5 bytes are start code + nal unit type */
1308                                1, 0, 1, slice_batch);
1309
1310     dri_bo_map(vme_context->vme_output.bo , 1);
1311     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1312
1313     if (is_intra) {
1314         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1315     } else {
1316         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1317     }
1318    
1319     for (i = pSliceParameter->macroblock_address; 
1320          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1321         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1322         x = i % width_in_mbs;
1323         y = i / width_in_mbs;
1324         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1325
1326         if (is_intra) {
1327             assert(msg);
1328             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1329         } else {
1330             int inter_rdo, intra_rdo;
1331             inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1332             intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1333             offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1334             if (intra_rdo < inter_rdo) { 
1335                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1336             } else {
1337                 msg += INTER_MSG_OFFSET;
1338                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1339             }
1340         }
1341     }
1342    
1343     dri_bo_unmap(vme_context->vme_output.bo);
1344
1345     if ( last_slice ) {    
1346         mfc_context->insert_object(ctx, encoder_context,
1347                                    tail_data, 2, 8,
1348                                    2, 1, 1, 0, slice_batch);
1349     } else {
1350         mfc_context->insert_object(ctx, encoder_context,
1351                                    tail_data, 1, 8,
1352                                    1, 1, 1, 0, slice_batch);
1353     }
1354
1355     free(slice_header);
1356
1357 }
1358
1359 static dri_bo *
1360 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1361                                   struct encode_state *encode_state,
1362                                   struct intel_encoder_context *encoder_context)
1363 {
1364     struct i965_driver_data *i965 = i965_driver_data(ctx);
1365     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1366     dri_bo *batch_bo = batch->buffer;
1367     int i;
1368
1369     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1370         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1371     }
1372
1373     intel_batchbuffer_align(batch, 8);
1374     
1375     BEGIN_BCS_BATCH(batch, 2);
1376     OUT_BCS_BATCH(batch, 0);
1377     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1378     ADVANCE_BCS_BATCH(batch);
1379
1380     dri_bo_reference(batch_bo);
1381     intel_batchbuffer_free(batch);
1382
1383     return batch_bo;
1384 }
1385
1386 #else
1387
1388 static void
1389 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1390                                     struct encode_state *encode_state,
1391                                     struct intel_encoder_context *encoder_context)
1392
1393 {
1394     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1395     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1396
1397     assert(vme_context->vme_output.bo);
1398     mfc_context->buffer_suface_setup(ctx,
1399                                      &mfc_context->gpe_context,
1400                                      &vme_context->vme_output,
1401                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1402                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1403     assert(mfc_context->aux_batchbuffer_surface.bo);
1404     mfc_context->buffer_suface_setup(ctx,
1405                                      &mfc_context->gpe_context,
1406                                      &mfc_context->aux_batchbuffer_surface,
1407                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1408                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1409 }
1410
1411 static void
1412 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1413                                      struct encode_state *encode_state,
1414                                      struct intel_encoder_context *encoder_context)
1415
1416 {
1417     struct i965_driver_data *i965 = i965_driver_data(ctx);
1418     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1419     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1420     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1421     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1422     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1423     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1424     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1425     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1426                                                            "MFC batchbuffer",
1427                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1428                                                            0x1000);
1429     mfc_context->buffer_suface_setup(ctx,
1430                                      &mfc_context->gpe_context,
1431                                      &mfc_context->mfc_batchbuffer_surface,
1432                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1433                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1434 }
1435
1436 static void
1437 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1438                                     struct encode_state *encode_state,
1439                                     struct intel_encoder_context *encoder_context)
1440 {
1441     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1442     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1443 }
1444
1445 static void
1446 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1447                                 struct encode_state *encode_state,
1448                                 struct intel_encoder_context *encoder_context)
1449 {
1450     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1451     struct gen6_interface_descriptor_data *desc;   
1452     int i;
1453     dri_bo *bo;
1454
1455     bo = mfc_context->gpe_context.idrt.bo;
1456     dri_bo_map(bo, 1);
1457     assert(bo->virtual);
1458     desc = bo->virtual;
1459
1460     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1461         struct i965_kernel *kernel;
1462
1463         kernel = &mfc_context->gpe_context.kernels[i];
1464         assert(sizeof(*desc) == 32);
1465
1466         /*Setup the descritor table*/
1467         memset(desc, 0, sizeof(*desc));
1468         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1469         desc->desc2.sampler_count = 0;
1470         desc->desc2.sampler_state_pointer = 0;
1471         desc->desc3.binding_table_entry_count = 2;
1472         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1473         desc->desc4.constant_urb_entry_read_offset = 0;
1474         desc->desc4.constant_urb_entry_read_length = 4;
1475                 
1476         /*kernel start*/
1477         dri_bo_emit_reloc(bo,   
1478                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1479                           0,
1480                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1481                           kernel->bo);
1482         desc++;
1483     }
1484
1485     dri_bo_unmap(bo);
1486 }
1487
1488 static void
1489 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1490                                     struct encode_state *encode_state,
1491                                     struct intel_encoder_context *encoder_context)
1492 {
1493     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1494     
1495     (void)mfc_context;
1496 }
1497
1498 static void
1499 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1500                                          int index,
1501                                          int head_offset,
1502                                          int batchbuffer_offset,
1503                                          int head_size,
1504                                          int tail_size,
1505                                          int number_mb_cmds,
1506                                          int first_object,
1507                                          int last_object,
1508                                          int last_slice,
1509                                          int mb_x,
1510                                          int mb_y,
1511                                          int width_in_mbs,
1512                                          int qp)
1513 {
1514     BEGIN_BATCH(batch, 12);
1515     
1516     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1517     OUT_BATCH(batch, index);
1518     OUT_BATCH(batch, 0);
1519     OUT_BATCH(batch, 0);
1520     OUT_BATCH(batch, 0);
1521     OUT_BATCH(batch, 0);
1522    
1523     /*inline data */
1524     OUT_BATCH(batch, head_offset);
1525     OUT_BATCH(batch, batchbuffer_offset);
1526     OUT_BATCH(batch, 
1527               head_size << 16 |
1528               tail_size);
1529     OUT_BATCH(batch,
1530               number_mb_cmds << 16 |
1531               first_object << 2 |
1532               last_object << 1 |
1533               last_slice);
1534     OUT_BATCH(batch,
1535               mb_y << 8 |
1536               mb_x);
1537     OUT_BATCH(batch,
1538               qp << 16 |
1539               width_in_mbs);
1540
1541     ADVANCE_BATCH(batch);
1542 }
1543
1544 static void
1545 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1546                                        struct intel_encoder_context *encoder_context,
1547                                        VAEncSliceParameterBufferH264 *slice_param,
1548                                        int head_offset,
1549                                        unsigned short head_size,
1550                                        unsigned short tail_size,
1551                                        int batchbuffer_offset,
1552                                        int qp,
1553                                        int last_slice)
1554 {
1555     struct intel_batchbuffer *batch = encoder_context->base.batch;
1556     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1557     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1558     int total_mbs = slice_param->num_macroblocks;
1559     int number_mb_cmds = 128;
1560     int starting_mb = 0;
1561     int last_object = 0;
1562     int first_object = 1;
1563     int i;
1564     int mb_x, mb_y;
1565     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1566
1567     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1568         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1569         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1570         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1571         assert(mb_x <= 255 && mb_y <= 255);
1572
1573         starting_mb += number_mb_cmds;
1574
1575         gen75_mfc_batchbuffer_emit_object_command(batch,
1576                                                  index,
1577                                                  head_offset,
1578                                                  batchbuffer_offset,
1579                                                  head_size,
1580                                                  tail_size,
1581                                                  number_mb_cmds,
1582                                                  first_object,
1583                                                  last_object,
1584                                                  last_slice,
1585                                                  mb_x,
1586                                                  mb_y,
1587                                                  width_in_mbs,
1588                                                  qp);
1589
1590         if (first_object) {
1591             head_offset += head_size;
1592             batchbuffer_offset += head_size;
1593         }
1594
1595         if (last_object) {
1596             head_offset += tail_size;
1597             batchbuffer_offset += tail_size;
1598         }
1599
1600         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1601
1602         first_object = 0;
1603     }
1604
1605     if (!last_object) {
1606         last_object = 1;
1607         number_mb_cmds = total_mbs % number_mb_cmds;
1608         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1609         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1610         assert(mb_x <= 255 && mb_y <= 255);
1611         starting_mb += number_mb_cmds;
1612
1613         gen75_mfc_batchbuffer_emit_object_command(batch,
1614                                                  index,
1615                                                  head_offset,
1616                                                  batchbuffer_offset,
1617                                                  head_size,
1618                                                  tail_size,
1619                                                  number_mb_cmds,
1620                                                  first_object,
1621                                                  last_object,
1622                                                  last_slice,
1623                                                  mb_x,
1624                                                  mb_y,
1625                                                  width_in_mbs,
1626                                                  qp);
1627     }
1628 }
1629                           
1630 /*
1631  * return size in Owords (16bytes)
1632  */         
1633 static int
1634 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1635                                struct encode_state *encode_state,
1636                                struct intel_encoder_context *encoder_context,
1637                                int slice_index,
1638                                int batchbuffer_offset)
1639 {
1640     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1641     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1642     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1643     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1644     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1645     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1646     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1647     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1648     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1649     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1650     unsigned char *slice_header = NULL;
1651     int slice_header_length_in_bits = 0;
1652     unsigned int tail_data[] = { 0x0, 0x0 };
1653     long head_offset;
1654     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1655     unsigned short head_size, tail_size;
1656     int slice_type = pSliceParameter->slice_type;
1657
1658     if (rate_control_mode == VA_RC_CBR) {
1659         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1660         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1661     }
1662
1663     /* only support for 8-bit pixel bit-depth */
1664     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1665     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1666     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1667     assert(qp >= 0 && qp < 52);
1668
1669     head_offset = old_used / 16;
1670     gen75_mfc_avc_slice_state(ctx,
1671                              pPicParameter,
1672                              pSliceParameter,
1673                              encode_state,
1674                              encoder_context,
1675                              (rate_control_mode == VA_RC_CBR),
1676                              qp,
1677                              slice_batch);
1678
1679     if (slice_index == 0)
1680         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1681
1682     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1683
1684     // slice hander
1685     mfc_context->insert_object(ctx,
1686                                encoder_context,
1687                                (unsigned int *)slice_header,
1688                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1689                                slice_header_length_in_bits & 0x1f,
1690                                5,  /* first 5 bytes are start code + nal unit type */
1691                                1,
1692                                0,
1693                                1,
1694                                slice_batch);
1695     free(slice_header);
1696
1697     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1698     used = intel_batchbuffer_used_size(slice_batch);
1699     head_size = (used - old_used) / 16;
1700     old_used = used;
1701
1702     /* tail */
1703     if (last_slice) {    
1704         mfc_context->insert_object(ctx,
1705                                    encoder_context,
1706                                    tail_data,
1707                                    2,
1708                                    8,
1709                                    2,
1710                                    1,
1711                                    1,
1712                                    0,
1713                                    slice_batch);
1714     } else {
1715         mfc_context->insert_object(ctx,
1716                                    encoder_context,
1717                                    tail_data,
1718                                    1,
1719                                    8,
1720                                    1,
1721                                    1,
1722                                    1,
1723                                    0,
1724                                    slice_batch);
1725     }
1726
1727     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1728     used = intel_batchbuffer_used_size(slice_batch);
1729     tail_size = (used - old_used) / 16;
1730
1731    
1732     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1733                                            encoder_context,
1734                                            pSliceParameter,
1735                                            head_offset,
1736                                            head_size,
1737                                            tail_size,
1738                                            batchbuffer_offset,
1739                                            qp,
1740                                            last_slice);
1741
1742     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1743 }
1744
1745 static void
1746 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1747                                   struct encode_state *encode_state,
1748                                   struct intel_encoder_context *encoder_context)
1749 {
1750     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1751     struct intel_batchbuffer *batch = encoder_context->base.batch;
1752     int i, size, offset = 0;
1753     intel_batchbuffer_start_atomic(batch, 0x4000); 
1754     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1755
1756     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1757         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1758         offset += size;
1759     }
1760
1761     intel_batchbuffer_end_atomic(batch);
1762     intel_batchbuffer_flush(batch);
1763 }
1764
1765 static void
1766 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1767                                struct encode_state *encode_state,
1768                                struct intel_encoder_context *encoder_context)
1769 {
1770     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1771     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1772     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1773     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1774 }
1775
1776 static dri_bo *
1777 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1778                                   struct encode_state *encode_state,
1779                                   struct intel_encoder_context *encoder_context)
1780 {
1781     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1782
1783     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1784     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1785
1786     return mfc_context->mfc_batchbuffer_surface.bo;
1787 }
1788
1789 #endif
1790
1791 static void
1792 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1793                                  struct encode_state *encode_state,
1794                                  struct intel_encoder_context *encoder_context)
1795 {
1796     struct intel_batchbuffer *batch = encoder_context->base.batch;
1797     dri_bo *slice_batch_bo;
1798
1799     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1800         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1801         assert(0);
1802         return; 
1803     }
1804
1805 #ifdef MFC_SOFTWARE_HASWELL
1806     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1807 #else
1808     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1809 #endif
1810
1811     // begin programing
1812     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1813     intel_batchbuffer_emit_mi_flush(batch);
1814     
1815     // picture level programing
1816     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1817
1818     BEGIN_BCS_BATCH(batch, 2);
1819     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1820     OUT_BCS_RELOC(batch,
1821                   slice_batch_bo,
1822                   I915_GEM_DOMAIN_COMMAND, 0, 
1823                   0);
1824     ADVANCE_BCS_BATCH(batch);
1825
1826     // end programing
1827     intel_batchbuffer_end_atomic(batch);
1828
1829     dri_bo_unreference(slice_batch_bo);
1830 }
1831
1832
1833 static VAStatus
1834 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1835                             struct encode_state *encode_state,
1836                             struct intel_encoder_context *encoder_context)
1837 {
1838     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1839     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1840     int current_frame_bits_size;
1841     int sts;
1842  
1843     for (;;) {
1844         gen75_mfc_init(ctx, encoder_context);
1845         gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1846         /*Programing bcs pipeline*/
1847         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1848         gen75_mfc_run(ctx, encode_state, encoder_context);
1849         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1850             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1851             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1852             if (sts == BRC_NO_HRD_VIOLATION) {
1853                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1854                 break;
1855             }
1856             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1857                 if (!mfc_context->hrd.violation_noted) {
1858                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1859                     mfc_context->hrd.violation_noted = 1;
1860                 }
1861                 return VA_STATUS_SUCCESS;
1862             }
1863         } else {
1864             break;
1865         }
1866     }
1867
1868     return VA_STATUS_SUCCESS;
1869 }
1870
1871
1872 static void
1873 gen75_mfc_context_destroy(void *context)
1874 {
1875     struct gen6_mfc_context *mfc_context = context;
1876     int i;
1877
1878     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1879     mfc_context->post_deblocking_output.bo = NULL;
1880
1881     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1882     mfc_context->pre_deblocking_output.bo = NULL;
1883
1884     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1885     mfc_context->uncompressed_picture_source.bo = NULL;
1886
1887     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1888     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1889
1890     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1891         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1892         mfc_context->direct_mv_buffers[i].bo = NULL;
1893     }
1894
1895     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1896     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1897
1898     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1899     mfc_context->macroblock_status_buffer.bo = NULL;
1900
1901     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1902     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1903
1904     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1905     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1906
1907
1908     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1909         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1910         mfc_context->reference_surfaces[i].bo = NULL;  
1911     }
1912
1913     i965_gpe_context_destroy(&mfc_context->gpe_context);
1914
1915     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1916     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1917
1918     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1919     mfc_context->aux_batchbuffer_surface.bo = NULL;
1920
1921     if (mfc_context->aux_batchbuffer)
1922         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1923
1924     mfc_context->aux_batchbuffer = NULL;
1925
1926     free(mfc_context);
1927 }
1928
1929 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1930                   VAProfile profile,
1931                   struct encode_state *encode_state,
1932                   struct intel_encoder_context *encoder_context)
1933 {
1934     VAStatus vaStatus;
1935
1936     switch (profile) {
1937     case VAProfileH264Baseline:
1938     case VAProfileH264Main:
1939     case VAProfileH264High:
1940         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1941         break;
1942
1943         /* FIXME: add for other profile */
1944     default:
1945         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1946         break;
1947     }
1948
1949     return vaStatus;
1950 }
1951
1952 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1953 {
1954     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1955
1956     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1957
1958     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1959     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1960
1961     mfc_context->gpe_context.curbe.length = 32 * 4;
1962
1963     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1964     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1965     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1966     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1967     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1968
1969     i965_gpe_load_kernels(ctx,
1970                           &mfc_context->gpe_context,
1971                           gen75_mfc_kernels,
1972                           NUM_MFC_KERNEL);
1973
1974     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1975     mfc_context->set_surface_state = gen75_mfc_surface_state;
1976     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1977     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1978     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1979     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1980     mfc_context->insert_object = gen75_mfc_avc_insert_object;
1981     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1982
1983     encoder_context->mfc_context = mfc_context;
1984     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1985     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1986     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1987
1988     return True;
1989 }