Add the Intra VME for I-frame on Haswell
[platform/upstream/libva-intel-driver.git] / src / gen75_mfc.c
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Zhao Yakui <yakui.zhao@intel.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
42 #include "gen6_mfc.h"
43 #include "gen6_vme.h"
44
45 #define MFC_SOFTWARE_HASWELL    1
46
47 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
48 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
49 };
50
51 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
53 };
54
55 static struct i965_kernel gen75_mfc_kernels[] = {
56     {
57         "MFC AVC INTRA BATCHBUFFER ",
58         MFC_BATCHBUFFER_AVC_INTRA,
59         gen75_mfc_batchbuffer_avc_intra,
60         sizeof(gen75_mfc_batchbuffer_avc_intra),
61         NULL
62     },
63
64     {
65         "MFC AVC INTER BATCHBUFFER ",
66         MFC_BATCHBUFFER_AVC_INTER,
67         gen75_mfc_batchbuffer_avc_inter,
68         sizeof(gen75_mfc_batchbuffer_avc_inter),
69         NULL
70     },
71 };
72
73 static void
74 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
75                           int standard_select,
76                           struct intel_encoder_context *encoder_context)
77 {
78     struct intel_batchbuffer *batch = encoder_context->base.batch;
79     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
80
81     assert(standard_select == MFX_FORMAT_MPEG2 ||
82            standard_select == MFX_FORMAT_AVC);
83
84     BEGIN_BCS_BATCH(batch, 5);
85
86     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
87     OUT_BCS_BATCH(batch,
88                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
89                   (MFD_MODE_VLD << 15) | /* VLD mode */
90                   (1 << 10) | /* Stream-Out Enable */
91                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
92                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
93                   (0 << 8)  | /* Pre Deblocking Output */
94                   (0 << 5)  | /* not in stitch mode */
95                   (1 << 4)  | /* encoding mode */
96                   (standard_select << 0));  /* standard select: avc or mpeg2 */
97     OUT_BCS_BATCH(batch,
98                   (0 << 7)  | /* expand NOA bus flag */
99                   (0 << 6)  | /* disable slice-level clock gating */
100                   (0 << 5)  | /* disable clock gating for NOA */
101                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
102                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
103                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
104                   (0 << 1)  |
105                   (0 << 0));
106     OUT_BCS_BATCH(batch, 0);
107     OUT_BCS_BATCH(batch, 0);
108
109     ADVANCE_BCS_BATCH(batch);
110 }
111
112 static void
113 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
114 {
115     struct intel_batchbuffer *batch = encoder_context->base.batch;
116     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
117
118     BEGIN_BCS_BATCH(batch, 6);
119
120     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
121     OUT_BCS_BATCH(batch, 0);
122     OUT_BCS_BATCH(batch,
123                   ((mfc_context->surface_state.height - 1) << 18) |
124                   ((mfc_context->surface_state.width - 1) << 4));
125     OUT_BCS_BATCH(batch,
126                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
127                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
128                   (0 << 22) | /* surface object control state, FIXME??? */
129                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
130                   (0 << 2)  | /* must be 0 for interleave U/V */
131                   (1 << 1)  | /* must be tiled */
132                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
133     OUT_BCS_BATCH(batch,
134                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
135                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
136     OUT_BCS_BATCH(batch, 0);
137
138     ADVANCE_BCS_BATCH(batch);
139 }
140
141 static void
142 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
143 {
144     struct intel_batchbuffer *batch = encoder_context->base.batch;
145     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
146     struct gen6_vme_context *vme_context = encoder_context->vme_context;
147
148     BEGIN_BCS_BATCH(batch, 11);
149
150     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch, 0);
153     /* MFX Indirect MV Object Base Address */
154     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
155     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch, 0);
158     OUT_BCS_BATCH(batch, 0);
159     OUT_BCS_BATCH(batch, 0);
160     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/    
161     OUT_BCS_RELOC(batch,
162                   mfc_context->mfc_indirect_pak_bse_object.bo,
163                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
164                   0);
165     OUT_BCS_RELOC(batch,
166                   mfc_context->mfc_indirect_pak_bse_object.bo,
167                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
168                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
169
170     ADVANCE_BCS_BATCH(batch);
171 }
172
173 static void
174 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
175                        struct intel_encoder_context *encoder_context)
176 {
177     struct intel_batchbuffer *batch = encoder_context->base.batch;
178     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
179     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
180
181     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
182     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
183
184     BEGIN_BCS_BATCH(batch, 16);
185
186     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
187     OUT_BCS_BATCH(batch,
188                   ((width_in_mbs * height_in_mbs) & 0xFFFF));
189     OUT_BCS_BATCH(batch, 
190                   ((height_in_mbs - 1) << 16) | 
191                   ((width_in_mbs - 1) << 0));
192     OUT_BCS_BATCH(batch, 
193                   (0 << 24) |   /* Second Chroma QP Offset */
194                   (0 << 16) |   /* Chroma QP Offset */
195                   (0 << 14) |   /* Max-bit conformance Intra flag */
196                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
197                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
198                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
199                   (0 << 8)  |   /* FIXME: Image Structure */
200                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
201     OUT_BCS_BATCH(batch,
202                   (0 << 16) |   /* Mininum Frame size */
203                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
204                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
205                   (0 << 13) |   /* CABAC 0 word insertion test enable */
206                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
207                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
208                   (0 << 9)  |   /* FIXME: MbMvFormatFlag */
209                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
210                   (0 << 6)  |   /* Only valid for VLD decoding mode */
211                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
212                   (0 << 4)  |   /* Direct 8x8 inference flag */
213                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
214                   (1 << 2)  |   /* Frame MB only flag */
215                   (0 << 1)  |   /* MBAFF mode is in active */
216                   (0 << 0));    /* Field picture flag */
217     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
218     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
219                   (0xBB8 << 16) |       /* InterMbMaxSz */
220                   (0xEE8) );            /* IntraMbMaxSz */
221     OUT_BCS_BATCH(batch, 0);            /* Reserved */
222     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
223     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */        
224     OUT_BCS_BATCH(batch, 0x8C000000);
225     OUT_BCS_BATCH(batch, 0x00010000);
226     OUT_BCS_BATCH(batch, 0);
227     OUT_BCS_BATCH(batch, 0);
228     OUT_BCS_BATCH(batch, 0);
229     OUT_BCS_BATCH(batch, 0);
230
231     ADVANCE_BCS_BATCH(batch);
232 }
233
234 static void
235 gen75_mfc_qm_state(VADriverContextP ctx,
236                   int qm_type,
237                   unsigned int *qm,
238                   int qm_length,
239                   struct intel_encoder_context *encoder_context)
240 {
241     struct intel_batchbuffer *batch = encoder_context->base.batch;
242     unsigned int qm_buffer[16];
243
244     assert(qm_length <= 16);
245     assert(sizeof(*qm) == 4);
246     memcpy(qm_buffer, qm, qm_length * 4);
247
248     BEGIN_BCS_BATCH(batch, 18);
249     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
250     OUT_BCS_BATCH(batch, qm_type << 0);
251     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
252     ADVANCE_BCS_BATCH(batch);
253 }
254
255 static void
256 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
257 {
258     unsigned int qm[16] = {
259         0x10101010, 0x10101010, 0x10101010, 0x10101010,
260         0x10101010, 0x10101010, 0x10101010, 0x10101010,
261         0x10101010, 0x10101010, 0x10101010, 0x10101010,
262         0x10101010, 0x10101010, 0x10101010, 0x10101010
263     };
264
265     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
266     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
267     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
268     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
269 }
270
271 static void
272 gen75_mfc_fqm_state(VADriverContextP ctx,
273                    int fqm_type,
274                    unsigned int *fqm,
275                    int fqm_length,
276                    struct intel_encoder_context *encoder_context)
277 {
278     struct intel_batchbuffer *batch = encoder_context->base.batch;
279     unsigned int fqm_buffer[32];
280
281     assert(fqm_length <= 32);
282     assert(sizeof(*fqm) == 4);
283     memcpy(fqm_buffer, fqm, fqm_length * 4);
284
285     BEGIN_BCS_BATCH(batch, 34);
286     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
287     OUT_BCS_BATCH(batch, fqm_type << 0);
288     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
289     ADVANCE_BCS_BATCH(batch);
290 }
291
292 static void
293 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
294 {
295     unsigned int qm[32] = {
296         0x10001000, 0x10001000, 0x10001000, 0x10001000,
297         0x10001000, 0x10001000, 0x10001000, 0x10001000,
298         0x10001000, 0x10001000, 0x10001000, 0x10001000,
299         0x10001000, 0x10001000, 0x10001000, 0x10001000,
300         0x10001000, 0x10001000, 0x10001000, 0x10001000,
301         0x10001000, 0x10001000, 0x10001000, 0x10001000,
302         0x10001000, 0x10001000, 0x10001000, 0x10001000,
303         0x10001000, 0x10001000, 0x10001000, 0x10001000
304     };
305
306     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
307     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
308     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
309     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
310 }
311
312 static void
313 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
314                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
315                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
316                            struct intel_batchbuffer *batch)
317 {
318     if (batch == NULL)
319         batch = encoder_context->base.batch;
320
321     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
322
323     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
324     OUT_BCS_BATCH(batch,
325                   (0 << 16) |   /* always start at offset 0 */
326                   (data_bits_in_last_dw << 8) |
327                   (skip_emul_byte_count << 4) |
328                   (!!emulation_flag << 3) |
329                   ((!!is_last_header) << 2) |
330                   ((!!is_end_of_slice) << 1) |
331                   (0 << 0));    /* FIXME: ??? */
332     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
333
334     ADVANCE_BCS_BATCH(batch);
335 }
336
337
338
339 static void 
340 gen75_mfc_free_avc_surface(void **data)
341 {
342     struct gen6_mfc_avc_surface_aux *avc_surface = *data;
343
344     if (!avc_surface)
345         return;
346
347     dri_bo_unreference(avc_surface->dmv_top);
348     avc_surface->dmv_top = NULL;
349     dri_bo_unreference(avc_surface->dmv_bottom);
350     avc_surface->dmv_bottom = NULL;
351
352     free(avc_surface);
353     *data = NULL;
354 }
355
356 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
357 {
358     struct i965_driver_data *i965 = i965_driver_data(ctx);
359     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
360     dri_bo *bo;
361     int i;
362
363     /*Encode common setup for MFC*/
364     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
365     mfc_context->post_deblocking_output.bo = NULL;
366
367     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
368     mfc_context->pre_deblocking_output.bo = NULL;
369
370     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
371     mfc_context->uncompressed_picture_source.bo = NULL;
372
373     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
374     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
375
376     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
377         if ( mfc_context->direct_mv_buffers[i].bo != NULL);
378         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
379         mfc_context->direct_mv_buffers[i].bo = NULL;
380     }
381
382     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
383         if (mfc_context->reference_surfaces[i].bo != NULL)
384             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
385         mfc_context->reference_surfaces[i].bo = NULL;  
386     }
387
388     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
389     bo = dri_bo_alloc(i965->intel.bufmgr,
390                       "Buffer",
391                       128 * 64,
392                       64);
393     assert(bo);
394     mfc_context->intra_row_store_scratch_buffer.bo = bo;
395
396     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
397     bo = dri_bo_alloc(i965->intel.bufmgr,
398                       "Buffer",
399                       128*128*16,
400                       64);
401     assert(bo);
402     mfc_context->macroblock_status_buffer.bo = bo;
403
404     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
405     bo = dri_bo_alloc(i965->intel.bufmgr,
406                       "Buffer",
407                       49152,  /* 6 * 128 * 64 */
408                       64);
409     assert(bo);
410     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
411
412     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
413     bo = dri_bo_alloc(i965->intel.bufmgr,
414                       "Buffer",
415                       12288, /* 1.5 * 128 * 64 */
416                       0x1000);
417     assert(bo);
418     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
419
420     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
421     mfc_context->mfc_batchbuffer_surface.bo = NULL;
422
423     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
424     mfc_context->aux_batchbuffer_surface.bo = NULL;
425
426     if (mfc_context->aux_batchbuffer)
427         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
428
429     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
430     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
431     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
432     mfc_context->aux_batchbuffer_surface.pitch = 16;
433     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
434     mfc_context->aux_batchbuffer_surface.size_block = 16;
435
436     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
437 }
438
439 static void
440 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
441 {
442     struct intel_batchbuffer *batch = encoder_context->base.batch;
443     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
444     int i;
445
446     BEGIN_BCS_BATCH(batch, 24);
447
448     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
449
450     if (mfc_context->pre_deblocking_output.bo)
451         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
452                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
453                       0);
454     else
455         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
456
457     if (mfc_context->post_deblocking_output.bo)
458         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
459                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
460                       0);                                                                                       /* post output addr  */ 
461     else
462         OUT_BCS_BATCH(batch, 0);
463
464     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
465                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
466                   0);                                                                                   /* uncompressed data */
467     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
468                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
469                   0);                                                                                   /* StreamOut data*/
470     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
471                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
472                   0);   
473     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
474                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
475                   0);
476     /* 7..22 Reference pictures*/
477     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
478         if ( mfc_context->reference_surfaces[i].bo != NULL) {
479             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
480                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
481                           0);                   
482         } else {
483             OUT_BCS_BATCH(batch, 0);
484         }
485     }
486     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
487                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
488                   0);                                                                                   /* Macroblock status buffer*/
489
490     ADVANCE_BCS_BATCH(batch);
491 }
492
493 static void
494 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
495 {
496     struct intel_batchbuffer *batch = encoder_context->base.batch;
497     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
498
499     int i;
500
501     BEGIN_BCS_BATCH(batch, 69);
502
503     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
504
505     /* Reference frames and Current frames */
506     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
507         if ( mfc_context->direct_mv_buffers[i].bo != NULL) { 
508             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
509                           I915_GEM_DOMAIN_INSTRUCTION, 0,
510                           0);
511         } else {
512             OUT_BCS_BATCH(batch, 0);
513         }
514     }
515
516     /* POL list */
517     for(i = 0; i < 32; i++) {
518         OUT_BCS_BATCH(batch, i/2);
519     }
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522
523     ADVANCE_BCS_BATCH(batch);
524 }
525
526 static void
527 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
528 {
529     struct intel_batchbuffer *batch = encoder_context->base.batch;
530     int i;
531
532     BEGIN_BCS_BATCH(batch, 10);
533     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
534     OUT_BCS_BATCH(batch, 0);                  //Select L0
535     OUT_BCS_BATCH(batch, 0x80808020);         //Only 1 reference
536     for(i = 0; i < 7; i++) {
537         OUT_BCS_BATCH(batch, 0x80808080);
538     }   
539     ADVANCE_BCS_BATCH(batch);
540
541     BEGIN_BCS_BATCH(batch, 10);
542     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8); 
543     OUT_BCS_BATCH(batch, 1);                  //Select L1
544     OUT_BCS_BATCH(batch, 0x80808022);         //Only 1 reference
545     for(i = 0; i < 7; i++) {
546         OUT_BCS_BATCH(batch, 0x80808080);
547     }   
548     ADVANCE_BCS_BATCH(batch);
549 }
550
551 static void
552 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
553 {
554     struct intel_batchbuffer *batch = encoder_context->base.batch;
555     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
556
557     BEGIN_BCS_BATCH(batch, 4);
558
559     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
560     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
561                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
562                   0);
563     OUT_BCS_BATCH(batch, 0);
564     OUT_BCS_BATCH(batch, 0);
565
566     ADVANCE_BCS_BATCH(batch);
567 }
568
569
570 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
571                                       struct encode_state *encode_state,
572                                       struct intel_encoder_context *encoder_context)
573 {
574     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
575
576     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
577     mfc_context->set_surface_state(ctx, encoder_context);
578     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
579     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
580     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
581     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
582     mfc_context->avc_qm_state(ctx, encoder_context);
583     mfc_context->avc_fqm_state(ctx, encoder_context);
584     gen75_mfc_avc_directmode_state(ctx, encoder_context); 
585     gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
586 }
587
588
589 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx, 
590                                      struct encode_state *encode_state,
591                                      struct intel_encoder_context *encoder_context)
592 {
593     struct i965_driver_data *i965 = i965_driver_data(ctx);
594     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
595     struct object_surface *obj_surface; 
596     struct object_buffer *obj_buffer;
597     struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
598     dri_bo *bo;
599     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
600     VAStatus vaStatus = VA_STATUS_SUCCESS;
601     int i, j, enable_avc_ildb = 0;
602     VAEncSliceParameterBufferH264 *slice_param;
603     VACodedBufferSegment *coded_buffer_segment;
604     unsigned char *flag = NULL;
605
606     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
607         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
608         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
609
610         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
611             assert((slice_param->slice_type == SLICE_TYPE_I) ||
612                    (slice_param->slice_type == SLICE_TYPE_SI) ||
613                    (slice_param->slice_type == SLICE_TYPE_P) ||
614                    (slice_param->slice_type == SLICE_TYPE_SP) ||
615                    (slice_param->slice_type == SLICE_TYPE_B));
616
617             if (slice_param->disable_deblocking_filter_idc != 1) {
618                 enable_avc_ildb = 1;
619                 break;
620             }
621
622             slice_param++;
623         }
624     }
625
626     /*Setup all the input&output object*/
627
628     /* Setup current frame and current direct mv buffer*/
629     obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
630     assert(obj_surface);
631     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
632
633     if ( obj_surface->private_data == NULL) {
634         gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
635         gen6_avc_surface->dmv_top = 
636             dri_bo_alloc(i965->intel.bufmgr,
637                          "Buffer",
638                          68*8192, 
639                          64);
640         gen6_avc_surface->dmv_bottom = 
641             dri_bo_alloc(i965->intel.bufmgr,
642                          "Buffer",
643                          68*8192, 
644                          64);
645         assert(gen6_avc_surface->dmv_top);
646         assert(gen6_avc_surface->dmv_bottom);
647         obj_surface->private_data = (void *)gen6_avc_surface;
648         obj_surface->free_private_data = (void *)gen75_mfc_free_avc_surface; 
649     }
650     gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
651     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
652     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
653     dri_bo_reference(gen6_avc_surface->dmv_top);
654     dri_bo_reference(gen6_avc_surface->dmv_bottom);
655
656     if (enable_avc_ildb) {
657         mfc_context->post_deblocking_output.bo = obj_surface->bo;
658         dri_bo_reference(mfc_context->post_deblocking_output.bo);
659     } else {
660         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
661         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
662     }
663
664     mfc_context->surface_state.width = obj_surface->orig_width;
665     mfc_context->surface_state.height = obj_surface->orig_height;
666     mfc_context->surface_state.w_pitch = obj_surface->width;
667     mfc_context->surface_state.h_pitch = obj_surface->height;
668     
669     /* Setup reference frames and direct mv buffers*/
670     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
671         if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) { 
672             obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
673             assert(obj_surface);
674             if (obj_surface->bo != NULL) {
675                 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
676                 dri_bo_reference(obj_surface->bo);
677             }
678             /* Check DMV buffer */
679             if ( obj_surface->private_data == NULL) {
680                 
681                 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
682                 gen6_avc_surface->dmv_top = 
683                     dri_bo_alloc(i965->intel.bufmgr,
684                                  "Buffer",
685                                  68*8192, 
686                                  64);
687                 gen6_avc_surface->dmv_bottom = 
688                     dri_bo_alloc(i965->intel.bufmgr,
689                                  "Buffer",
690                                  68*8192, 
691                                  64);
692                 assert(gen6_avc_surface->dmv_top);
693                 assert(gen6_avc_surface->dmv_bottom);
694                 obj_surface->private_data = gen6_avc_surface;
695                 obj_surface->free_private_data = gen75_mfc_free_avc_surface; 
696             }
697     
698             gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
699             /* Setup DMV buffer */
700             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
701             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom; 
702             dri_bo_reference(gen6_avc_surface->dmv_top);
703             dri_bo_reference(gen6_avc_surface->dmv_bottom);
704         } else {
705             break;
706         }
707     }
708         
709     obj_surface = SURFACE(encoder_context->input_yuv_surface);
710     assert(obj_surface && obj_surface->bo);
711     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
712     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
713
714     obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
715     bo = obj_buffer->buffer_store->bo;
716     assert(bo);
717     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
718     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
719     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
720     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
721     
722     dri_bo_map(bo, 1);
723     coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
724     flag = (unsigned char *)(coded_buffer_segment + 1);
725     *flag = 0;
726     dri_bo_unmap(bo);
727
728     return vaStatus;
729 }
730
731
732 static VAStatus gen75_mfc_run(VADriverContextP ctx, 
733                              struct encode_state *encode_state,
734                              struct intel_encoder_context *encoder_context)
735 {
736     struct intel_batchbuffer *batch = encoder_context->base.batch;
737
738     intel_batchbuffer_flush(batch);             //run the pipeline
739
740     return VA_STATUS_SUCCESS;
741 }
742
743
744 static VAStatus
745 gen75_mfc_stop(VADriverContextP ctx, 
746               struct encode_state *encode_state,
747               struct intel_encoder_context *encoder_context,
748               int *encoded_bits_size)
749 {
750     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
751     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
752     VACodedBufferSegment *coded_buffer_segment;
753     
754     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
755     assert(vaStatus == VA_STATUS_SUCCESS);
756     *encoded_bits_size = coded_buffer_segment->size * 8;
757     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
758
759     return VA_STATUS_SUCCESS;
760 }
761
762
763 static void
764 gen75_mfc_avc_slice_state(VADriverContextP ctx,
765                          VAEncPictureParameterBufferH264 *pic_param,
766                          VAEncSliceParameterBufferH264 *slice_param,
767                          struct encode_state *encode_state,
768                          struct intel_encoder_context *encoder_context,
769                          int rate_control_enable,
770                          int qp,
771                          struct intel_batchbuffer *batch)
772 {
773     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
774     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
775     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
776     int beginmb = slice_param->macroblock_address;
777     int endmb = beginmb + slice_param->num_macroblocks;
778     int beginx = beginmb % width_in_mbs;
779     int beginy = beginmb / width_in_mbs;
780     int nextx =  endmb % width_in_mbs;
781     int nexty = endmb / width_in_mbs;
782     int slice_type = slice_param->slice_type;
783     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
784     int bit_rate_control_target, maxQpN, maxQpP;
785     unsigned char correct[6], grow, shrink;
786     int i;
787     int weighted_pred_idc = 0;
788     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
789     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
790
791     if (batch == NULL)
792         batch = encoder_context->base.batch;
793
794     bit_rate_control_target = slice_type;
795     if (slice_type == SLICE_TYPE_SP)
796         bit_rate_control_target = SLICE_TYPE_P;
797     else if (slice_type == SLICE_TYPE_SI)
798         bit_rate_control_target = SLICE_TYPE_I;
799
800     if (slice_type == SLICE_TYPE_P) {
801         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
802     } else if (slice_type == SLICE_TYPE_B) {
803         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
804
805         if (weighted_pred_idc == 2) {
806             /* 8.4.3 - Derivation process for prediction weights (8-279) */
807             luma_log2_weight_denom = 5;
808             chroma_log2_weight_denom = 5;
809         }
810     }
811
812     maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
813     maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
814
815     for (i = 0; i < 6; i++)
816         correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
817
818     grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit + 
819         (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
820     shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit + 
821         (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
822
823     BEGIN_BCS_BATCH(batch, 11);;
824
825     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
826     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
827
828     if (slice_type == SLICE_TYPE_I) {
829         OUT_BCS_BATCH(batch, 0);                        /*no reference frames and pred_weight_table*/
830     } else {
831         OUT_BCS_BATCH(batch,
832                       (1 << 16) |                       /*1 reference frame*/
833                       (chroma_log2_weight_denom << 8) |
834                       (luma_log2_weight_denom << 0));
835     }
836
837     OUT_BCS_BATCH(batch, 
838                   (weighted_pred_idc << 30) |
839                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
840                   (slice_param->disable_deblocking_filter_idc << 27) |
841                   (slice_param->cabac_init_idc << 24) |
842                   (qp<<16) |                    /*Slice Quantization Parameter*/
843                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
844                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
845     OUT_BCS_BATCH(batch,
846                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
847                   (beginx << 16) |
848                   slice_param->macroblock_address );
849     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
850     OUT_BCS_BATCH(batch, 
851                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
852                   (1 << 30) |           /*ResetRateControlCounter*/
853                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
854                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
855                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
856                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
857                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
858                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
859                   (last_slice << 19) |     /*IsLastSlice*/
860                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
861                   (1 << 17) |       /*HeaderPresentFlag*/       
862                   (1 << 16) |       /*SliceData PresentFlag*/
863                   (1 << 15) |       /*TailPresentFlag*/
864                   (1 << 13) |       /*RBSP NAL TYPE*/   
865                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
866     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
867     OUT_BCS_BATCH(batch,
868                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/ 
869                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
870                   (shrink << 8)  |
871                   (grow << 0));   
872     OUT_BCS_BATCH(batch,
873                   (correct[5] << 20) |
874                   (correct[4] << 16) |
875                   (correct[3] << 12) |
876                   (correct[2] << 8) |
877                   (correct[1] << 4) |
878                   (correct[0] << 0));
879     OUT_BCS_BATCH(batch, 0);
880
881     ADVANCE_BCS_BATCH(batch);
882 }
883
884
885 static void gen75_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
886                                                     struct encode_state *encode_state,
887                                                     struct intel_encoder_context *encoder_context,
888                                                     struct intel_batchbuffer *slice_batch)
889 {
890     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
891     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
892
893     if (encode_state->packed_header_data[idx]) {
894         VAEncPackedHeaderParameterBuffer *param = NULL;
895         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
896         unsigned int length_in_bits;
897
898         assert(encode_state->packed_header_param[idx]);
899         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
900         length_in_bits = param->bit_length;
901
902         mfc_context->insert_object(ctx,
903                                    encoder_context,
904                                    header_data,
905                                    ALIGN(length_in_bits, 32) >> 5,
906                                    length_in_bits & 0x1f,
907                                    5,   /* FIXME: check it */
908                                    0,
909                                    0,
910                                    !param->has_emulation_bytes,
911                                    slice_batch);
912     }
913
914     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
915
916     if (encode_state->packed_header_data[idx]) {
917         VAEncPackedHeaderParameterBuffer *param = NULL;
918         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
919         unsigned int length_in_bits;
920
921         assert(encode_state->packed_header_param[idx]);
922         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
923         length_in_bits = param->bit_length;
924
925         mfc_context->insert_object(ctx,
926                                    encoder_context,
927                                    header_data,
928                                    ALIGN(length_in_bits, 32) >> 5,
929                                    length_in_bits & 0x1f,
930                                    5, /* FIXME: check it */
931                                    0,
932                                    0,
933                                    !param->has_emulation_bytes,
934                                    slice_batch);
935     }
936     
937     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
938
939     if (encode_state->packed_header_data[idx]) {
940         VAEncPackedHeaderParameterBuffer *param = NULL;
941         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
942         unsigned int length_in_bits;
943
944         assert(encode_state->packed_header_param[idx]);
945         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
946         length_in_bits = param->bit_length;
947
948         mfc_context->insert_object(ctx,
949                                    encoder_context,
950                                    header_data,
951                                    ALIGN(length_in_bits, 32) >> 5,
952                                    length_in_bits & 0x1f,
953                                    5, /* FIXME: check it */
954                                    0,
955                                    0,
956                                    !param->has_emulation_bytes,
957                                    slice_batch);
958     }
959 }
960
961 #ifdef MFC_SOFTWARE_HASWELL
962
963 static int
964 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
965                                 int qp,unsigned int *msg,
966                               struct intel_encoder_context *encoder_context,
967                               unsigned char target_mb_size, unsigned char max_mb_size,
968                               struct intel_batchbuffer *batch)
969 {
970     int len_in_dwords = 11;
971     unsigned int intra_msg;
972 #define         INTRA_MSG_FLAG          (1 << 13)
973 #define         INTRA_MBTYPE_MASK       (0x1F0000)
974     if (batch == NULL)
975         batch = encoder_context->base.batch;
976
977     BEGIN_BCS_BATCH(batch, len_in_dwords);
978
979     intra_msg = msg[0] & 0xC0FF;
980     intra_msg |= INTRA_MSG_FLAG;
981     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
982     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
983     OUT_BCS_BATCH(batch, 0);
984     OUT_BCS_BATCH(batch, 0);
985     OUT_BCS_BATCH(batch, 
986                   (0 << 24) |           /* PackedMvNum, Debug*/
987                   (0 << 20) |           /* No motion vector */
988                   (1 << 19) |           /* CbpDcY */
989                   (1 << 18) |           /* CbpDcU */
990                   (1 << 17) |           /* CbpDcV */
991                   intra_msg);
992
993     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
994     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */                
995     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
996
997     /*Stuff for Intra MB*/
998     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/        
999     OUT_BCS_BATCH(batch, msg[2]);       
1000     OUT_BCS_BATCH(batch, msg[3]&0xFC);          
1001     
1002     /*MaxSizeInWord and TargetSzieInWord*/
1003     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1004                   (target_mb_size << 16) );
1005
1006     ADVANCE_BCS_BATCH(batch);
1007
1008     return len_in_dwords;
1009 }
1010
1011 static int
1012 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1013                               unsigned int *msg, unsigned int offset,
1014                               struct intel_encoder_context *encoder_context,
1015                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1016                               struct intel_batchbuffer *batch)
1017 {
1018     int len_in_dwords = 11;
1019
1020     if (batch == NULL)
1021         batch = encoder_context->base.batch;
1022
1023     BEGIN_BCS_BATCH(batch, len_in_dwords);
1024
1025     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1026
1027     OUT_BCS_BATCH(batch, msg[2]);         /* 32 MV*/
1028     OUT_BCS_BATCH(batch, offset);
1029
1030     OUT_BCS_BATCH(batch, msg[0]);
1031
1032     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
1033     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
1034 #if 0 
1035     if ( slice_type == SLICE_TYPE_B) {
1036         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
1037     } else {
1038         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
1039     }
1040 #else
1041     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
1042 #endif
1043
1044
1045     /*Stuff for Inter MB*/
1046     OUT_BCS_BATCH(batch, msg[1]);        
1047     OUT_BCS_BATCH(batch, 0x0);    
1048     OUT_BCS_BATCH(batch, 0x0);        
1049
1050     /*MaxSizeInWord and TargetSzieInWord*/
1051     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1052                   (target_mb_size << 16) );
1053
1054     ADVANCE_BCS_BATCH(batch);
1055
1056     return len_in_dwords;
1057 }
1058
1059 static void 
1060 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1061                                        struct encode_state *encode_state,
1062                                        struct intel_encoder_context *encoder_context,
1063                                        int slice_index,
1064                                        struct intel_batchbuffer *slice_batch)
1065 {
1066     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1067     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1068     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1069     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1070     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1071     unsigned int *msg = NULL, offset = 0;
1072     unsigned char *msg_ptr = NULL;
1073     int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1074     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1075     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1076     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1077     int i,x,y;
1078     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1079     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1080     unsigned char *slice_header = NULL;
1081     int slice_header_length_in_bits = 0;
1082     unsigned int tail_data[] = { 0x0, 0x0 };
1083     int slice_type = pSliceParameter->slice_type;
1084
1085
1086     if (rate_control_mode == VA_RC_CBR) {
1087         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1088         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1089     }
1090
1091     /* only support for 8-bit pixel bit-depth */
1092     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1093     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1094     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1095     assert(qp >= 0 && qp < 52);
1096
1097     gen75_mfc_avc_slice_state(ctx, 
1098                              pPicParameter,
1099                              pSliceParameter,
1100                              encode_state, encoder_context,
1101                              (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1102
1103     if ( slice_index == 0) 
1104         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1105
1106     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1107
1108     // slice hander
1109     mfc_context->insert_object(ctx, encoder_context,
1110                                (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1111                                5,  /* first 5 bytes are start code + nal unit type */
1112                                1, 0, 1, slice_batch);
1113
1114     dri_bo_map(vme_context->vme_output.bo , 1);
1115     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1116
1117     if (is_intra) {
1118         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1119     } else {
1120         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1121         msg += 32; /* the first 32 DWs are MVs */
1122         offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
1123     }
1124    
1125     for (i = pSliceParameter->macroblock_address; 
1126          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1127         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1128         x = i % width_in_mbs;
1129         y = i / width_in_mbs;
1130         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1131
1132         if (is_intra) {
1133             assert(msg);
1134             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1135         } else {
1136             if (msg[0] & INTRA_MB_FLAG_MASK) {
1137                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1138             } else {
1139                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1140             }
1141
1142             offset += INTER_VME_OUTPUT_IN_BYTES;
1143         }
1144     }
1145    
1146     dri_bo_unmap(vme_context->vme_output.bo);
1147
1148     if ( last_slice ) {    
1149         mfc_context->insert_object(ctx, encoder_context,
1150                                    tail_data, 2, 8,
1151                                    2, 1, 1, 0, slice_batch);
1152     } else {
1153         mfc_context->insert_object(ctx, encoder_context,
1154                                    tail_data, 1, 8,
1155                                    1, 1, 1, 0, slice_batch);
1156     }
1157
1158     free(slice_header);
1159
1160 }
1161
1162 static dri_bo *
1163 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1164                                   struct encode_state *encode_state,
1165                                   struct intel_encoder_context *encoder_context)
1166 {
1167     struct i965_driver_data *i965 = i965_driver_data(ctx);
1168     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1169     dri_bo *batch_bo = batch->buffer;
1170     int i;
1171
1172     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1173         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1174     }
1175
1176     intel_batchbuffer_align(batch, 8);
1177     
1178     BEGIN_BCS_BATCH(batch, 2);
1179     OUT_BCS_BATCH(batch, 0);
1180     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1181     ADVANCE_BCS_BATCH(batch);
1182
1183     dri_bo_reference(batch_bo);
1184     intel_batchbuffer_free(batch);
1185
1186     return batch_bo;
1187 }
1188
1189 #else
1190
1191 static void
1192 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1193                                     struct encode_state *encode_state,
1194                                     struct intel_encoder_context *encoder_context)
1195
1196 {
1197     struct gen6_vme_context *vme_context = encoder_context->vme_context;
1198     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1199
1200     assert(vme_context->vme_output.bo);
1201     mfc_context->buffer_suface_setup(ctx,
1202                                      &mfc_context->gpe_context,
1203                                      &vme_context->vme_output,
1204                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1205                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1206     assert(mfc_context->aux_batchbuffer_surface.bo);
1207     mfc_context->buffer_suface_setup(ctx,
1208                                      &mfc_context->gpe_context,
1209                                      &mfc_context->aux_batchbuffer_surface,
1210                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1211                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1212 }
1213
1214 static void
1215 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1216                                      struct encode_state *encode_state,
1217                                      struct intel_encoder_context *encoder_context)
1218
1219 {
1220     struct i965_driver_data *i965 = i965_driver_data(ctx);
1221     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1222     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1223     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1224     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1225     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1226     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1227     mfc_context->mfc_batchbuffer_surface.pitch = 16;
1228     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, 
1229                                                            "MFC batchbuffer",
1230                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1231                                                            0x1000);
1232     mfc_context->buffer_suface_setup(ctx,
1233                                      &mfc_context->gpe_context,
1234                                      &mfc_context->mfc_batchbuffer_surface,
1235                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1236                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1237 }
1238
1239 static void
1240 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, 
1241                                     struct encode_state *encode_state,
1242                                     struct intel_encoder_context *encoder_context)
1243 {
1244     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1245     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1246 }
1247
1248 static void
1249 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, 
1250                                 struct encode_state *encode_state,
1251                                 struct intel_encoder_context *encoder_context)
1252 {
1253     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1254     struct gen6_interface_descriptor_data *desc;   
1255     int i;
1256     dri_bo *bo;
1257
1258     bo = mfc_context->gpe_context.idrt.bo;
1259     dri_bo_map(bo, 1);
1260     assert(bo->virtual);
1261     desc = bo->virtual;
1262
1263     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1264         struct i965_kernel *kernel;
1265
1266         kernel = &mfc_context->gpe_context.kernels[i];
1267         assert(sizeof(*desc) == 32);
1268
1269         /*Setup the descritor table*/
1270         memset(desc, 0, sizeof(*desc));
1271         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1272         desc->desc2.sampler_count = 0;
1273         desc->desc2.sampler_state_pointer = 0;
1274         desc->desc3.binding_table_entry_count = 2;
1275         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1276         desc->desc4.constant_urb_entry_read_offset = 0;
1277         desc->desc4.constant_urb_entry_read_length = 4;
1278                 
1279         /*kernel start*/
1280         dri_bo_emit_reloc(bo,   
1281                           I915_GEM_DOMAIN_INSTRUCTION, 0,
1282                           0,
1283                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1284                           kernel->bo);
1285         desc++;
1286     }
1287
1288     dri_bo_unmap(bo);
1289 }
1290
1291 static void
1292 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx, 
1293                                     struct encode_state *encode_state,
1294                                     struct intel_encoder_context *encoder_context)
1295 {
1296     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1297     
1298     (void)mfc_context;
1299 }
1300
1301 static void
1302 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1303                                          int index,
1304                                          int head_offset,
1305                                          int batchbuffer_offset,
1306                                          int head_size,
1307                                          int tail_size,
1308                                          int number_mb_cmds,
1309                                          int first_object,
1310                                          int last_object,
1311                                          int last_slice,
1312                                          int mb_x,
1313                                          int mb_y,
1314                                          int width_in_mbs,
1315                                          int qp)
1316 {
1317     BEGIN_BATCH(batch, 12);
1318     
1319     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1320     OUT_BATCH(batch, index);
1321     OUT_BATCH(batch, 0);
1322     OUT_BATCH(batch, 0);
1323     OUT_BATCH(batch, 0);
1324     OUT_BATCH(batch, 0);
1325    
1326     /*inline data */
1327     OUT_BATCH(batch, head_offset);
1328     OUT_BATCH(batch, batchbuffer_offset);
1329     OUT_BATCH(batch, 
1330               head_size << 16 |
1331               tail_size);
1332     OUT_BATCH(batch,
1333               number_mb_cmds << 16 |
1334               first_object << 2 |
1335               last_object << 1 |
1336               last_slice);
1337     OUT_BATCH(batch,
1338               mb_y << 8 |
1339               mb_x);
1340     OUT_BATCH(batch,
1341               qp << 16 |
1342               width_in_mbs);
1343
1344     ADVANCE_BATCH(batch);
1345 }
1346
1347 static void
1348 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1349                                        struct intel_encoder_context *encoder_context,
1350                                        VAEncSliceParameterBufferH264 *slice_param,
1351                                        int head_offset,
1352                                        unsigned short head_size,
1353                                        unsigned short tail_size,
1354                                        int batchbuffer_offset,
1355                                        int qp,
1356                                        int last_slice)
1357 {
1358     struct intel_batchbuffer *batch = encoder_context->base.batch;
1359     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1360     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1361     int total_mbs = slice_param->num_macroblocks;
1362     int number_mb_cmds = 128;
1363     int starting_mb = 0;
1364     int last_object = 0;
1365     int first_object = 1;
1366     int i;
1367     int mb_x, mb_y;
1368     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1369
1370     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1371         last_object = (total_mbs - starting_mb) == number_mb_cmds;
1372         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1373         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1374         assert(mb_x <= 255 && mb_y <= 255);
1375
1376         starting_mb += number_mb_cmds;
1377
1378         gen75_mfc_batchbuffer_emit_object_command(batch,
1379                                                  index,
1380                                                  head_offset,
1381                                                  batchbuffer_offset,
1382                                                  head_size,
1383                                                  tail_size,
1384                                                  number_mb_cmds,
1385                                                  first_object,
1386                                                  last_object,
1387                                                  last_slice,
1388                                                  mb_x,
1389                                                  mb_y,
1390                                                  width_in_mbs,
1391                                                  qp);
1392
1393         if (first_object) {
1394             head_offset += head_size;
1395             batchbuffer_offset += head_size;
1396         }
1397
1398         if (last_object) {
1399             head_offset += tail_size;
1400             batchbuffer_offset += tail_size;
1401         }
1402
1403         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1404
1405         first_object = 0;
1406     }
1407
1408     if (!last_object) {
1409         last_object = 1;
1410         number_mb_cmds = total_mbs % number_mb_cmds;
1411         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1412         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1413         assert(mb_x <= 255 && mb_y <= 255);
1414         starting_mb += number_mb_cmds;
1415
1416         gen75_mfc_batchbuffer_emit_object_command(batch,
1417                                                  index,
1418                                                  head_offset,
1419                                                  batchbuffer_offset,
1420                                                  head_size,
1421                                                  tail_size,
1422                                                  number_mb_cmds,
1423                                                  first_object,
1424                                                  last_object,
1425                                                  last_slice,
1426                                                  mb_x,
1427                                                  mb_y,
1428                                                  width_in_mbs,
1429                                                  qp);
1430     }
1431 }
1432                           
1433 /*
1434  * return size in Owords (16bytes)
1435  */         
1436 static int
1437 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1438                                struct encode_state *encode_state,
1439                                struct intel_encoder_context *encoder_context,
1440                                int slice_index,
1441                                int batchbuffer_offset)
1442 {
1443     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1444     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1445     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1446     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1447     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
1448     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1449     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1450     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1451     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1452     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1453     unsigned char *slice_header = NULL;
1454     int slice_header_length_in_bits = 0;
1455     unsigned int tail_data[] = { 0x0, 0x0 };
1456     long head_offset;
1457     int old_used = intel_batchbuffer_used_size(slice_batch), used;
1458     unsigned short head_size, tail_size;
1459     int slice_type = pSliceParameter->slice_type;
1460
1461     if (rate_control_mode == VA_RC_CBR) {
1462         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1463         pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1464     }
1465
1466     /* only support for 8-bit pixel bit-depth */
1467     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1468     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1469     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1470     assert(qp >= 0 && qp < 52);
1471
1472     head_offset = old_used / 16;
1473     gen75_mfc_avc_slice_state(ctx,
1474                              pPicParameter,
1475                              pSliceParameter,
1476                              encode_state,
1477                              encoder_context,
1478                              (rate_control_mode == VA_RC_CBR),
1479                              qp,
1480                              slice_batch);
1481
1482     if (slice_index == 0)
1483         gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1484
1485     slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1486
1487     // slice hander
1488     mfc_context->insert_object(ctx,
1489                                encoder_context,
1490                                (unsigned int *)slice_header,
1491                                ALIGN(slice_header_length_in_bits, 32) >> 5,
1492                                slice_header_length_in_bits & 0x1f,
1493                                5,  /* first 5 bytes are start code + nal unit type */
1494                                1,
1495                                0,
1496                                1,
1497                                slice_batch);
1498     free(slice_header);
1499
1500     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1501     used = intel_batchbuffer_used_size(slice_batch);
1502     head_size = (used - old_used) / 16;
1503     old_used = used;
1504
1505     /* tail */
1506     if (last_slice) {    
1507         mfc_context->insert_object(ctx,
1508                                    encoder_context,
1509                                    tail_data,
1510                                    2,
1511                                    8,
1512                                    2,
1513                                    1,
1514                                    1,
1515                                    0,
1516                                    slice_batch);
1517     } else {
1518         mfc_context->insert_object(ctx,
1519                                    encoder_context,
1520                                    tail_data,
1521                                    1,
1522                                    8,
1523                                    1,
1524                                    1,
1525                                    1,
1526                                    0,
1527                                    slice_batch);
1528     }
1529
1530     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1531     used = intel_batchbuffer_used_size(slice_batch);
1532     tail_size = (used - old_used) / 16;
1533
1534    
1535     gen75_mfc_avc_batchbuffer_slice_command(ctx,
1536                                            encoder_context,
1537                                            pSliceParameter,
1538                                            head_offset,
1539                                            head_size,
1540                                            tail_size,
1541                                            batchbuffer_offset,
1542                                            qp,
1543                                            last_slice);
1544
1545     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1546 }
1547
1548 static void
1549 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1550                                   struct encode_state *encode_state,
1551                                   struct intel_encoder_context *encoder_context)
1552 {
1553     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1554     struct intel_batchbuffer *batch = encoder_context->base.batch;
1555     int i, size, offset = 0;
1556     intel_batchbuffer_start_atomic(batch, 0x4000); 
1557     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1558
1559     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1560         size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1561         offset += size;
1562     }
1563
1564     intel_batchbuffer_end_atomic(batch);
1565     intel_batchbuffer_flush(batch);
1566 }
1567
1568 static void
1569 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx, 
1570                                struct encode_state *encode_state,
1571                                struct intel_encoder_context *encoder_context)
1572 {
1573     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1574     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1575     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1576     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1577 }
1578
1579 static dri_bo *
1580 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1581                                   struct encode_state *encode_state,
1582                                   struct intel_encoder_context *encoder_context)
1583 {
1584     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1585
1586     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1587     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1588
1589     return mfc_context->mfc_batchbuffer_surface.bo;
1590 }
1591
1592 #endif
1593
1594 static void
1595 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1596                                  struct encode_state *encode_state,
1597                                  struct intel_encoder_context *encoder_context)
1598 {
1599     struct intel_batchbuffer *batch = encoder_context->base.batch;
1600     dri_bo *slice_batch_bo;
1601
1602     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1603         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1604         assert(0);
1605         return; 
1606     }
1607
1608 #ifdef MFC_SOFTWARE_HASWELL
1609     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1610 #else
1611     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1612 #endif
1613
1614     // begin programing
1615     intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
1616     intel_batchbuffer_emit_mi_flush(batch);
1617     
1618     // picture level programing
1619     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1620
1621     BEGIN_BCS_BATCH(batch, 2);
1622     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1623     OUT_BCS_RELOC(batch,
1624                   slice_batch_bo,
1625                   I915_GEM_DOMAIN_COMMAND, 0, 
1626                   0);
1627     ADVANCE_BCS_BATCH(batch);
1628
1629     // end programing
1630     intel_batchbuffer_end_atomic(batch);
1631
1632     dri_bo_unreference(slice_batch_bo);
1633 }
1634
1635
1636 static VAStatus
1637 gen75_mfc_avc_encode_picture(VADriverContextP ctx, 
1638                             struct encode_state *encode_state,
1639                             struct intel_encoder_context *encoder_context)
1640 {
1641     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1642     unsigned int rate_control_mode = encoder_context->rate_control_mode;
1643     int current_frame_bits_size;
1644     int sts;
1645  
1646     for (;;) {
1647         gen75_mfc_init(ctx, encoder_context);
1648         gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1649         /*Programing bcs pipeline*/
1650         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
1651         gen75_mfc_run(ctx, encode_state, encoder_context);
1652         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1653             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
1654             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1655             if (sts == BRC_NO_HRD_VIOLATION) {
1656                 intel_mfc_hrd_context_update(encode_state, mfc_context);
1657                 break;
1658             }
1659             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1660                 if (!mfc_context->hrd.violation_noted) {
1661                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1662                     mfc_context->hrd.violation_noted = 1;
1663                 }
1664                 return VA_STATUS_SUCCESS;
1665             }
1666         } else {
1667             break;
1668         }
1669     }
1670
1671     return VA_STATUS_SUCCESS;
1672 }
1673
1674
1675 static void
1676 gen75_mfc_context_destroy(void *context)
1677 {
1678     struct gen6_mfc_context *mfc_context = context;
1679     int i;
1680
1681     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1682     mfc_context->post_deblocking_output.bo = NULL;
1683
1684     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1685     mfc_context->pre_deblocking_output.bo = NULL;
1686
1687     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1688     mfc_context->uncompressed_picture_source.bo = NULL;
1689
1690     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); 
1691     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1692
1693     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1694         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1695         mfc_context->direct_mv_buffers[i].bo = NULL;
1696     }
1697
1698     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1699     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1700
1701     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1702     mfc_context->macroblock_status_buffer.bo = NULL;
1703
1704     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1705     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1706
1707     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1708     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1709
1710
1711     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1712         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1713         mfc_context->reference_surfaces[i].bo = NULL;  
1714     }
1715
1716     i965_gpe_context_destroy(&mfc_context->gpe_context);
1717
1718     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1719     mfc_context->mfc_batchbuffer_surface.bo = NULL;
1720
1721     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1722     mfc_context->aux_batchbuffer_surface.bo = NULL;
1723
1724     if (mfc_context->aux_batchbuffer)
1725         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1726
1727     mfc_context->aux_batchbuffer = NULL;
1728
1729     free(mfc_context);
1730 }
1731
1732 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1733                   VAProfile profile,
1734                   struct encode_state *encode_state,
1735                   struct intel_encoder_context *encoder_context)
1736 {
1737     VAStatus vaStatus;
1738
1739     switch (profile) {
1740     case VAProfileH264Baseline:
1741     case VAProfileH264Main:
1742     case VAProfileH264High:
1743         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1744         break;
1745
1746         /* FIXME: add for other profile */
1747     default:
1748         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1749         break;
1750     }
1751
1752     return vaStatus;
1753 }
1754
1755 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1756 {
1757     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1758
1759     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1760
1761     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1762     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1763
1764     mfc_context->gpe_context.curbe.length = 32 * 4;
1765
1766     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1767     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1768     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1769     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1770     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1771
1772     i965_gpe_load_kernels(ctx,
1773                           &mfc_context->gpe_context,
1774                           gen75_mfc_kernels,
1775                           NUM_MFC_KERNEL);
1776
1777     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1778     mfc_context->set_surface_state = gen75_mfc_surface_state;
1779     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1780     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1781     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1782     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1783     mfc_context->insert_object = gen75_mfc_avc_insert_object;
1784     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1785
1786     encoder_context->mfc_context = mfc_context;
1787     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1788     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1789     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
1790
1791     return True;
1792 }